datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -47,7 +47,6 @@ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template
47
47
  */
48
48
  template<
49
49
  typename T,
50
- typename S = serde<T>, // deprecated, to be removed in the next major version
51
50
  typename A = std::allocator<T>
52
51
  >
53
52
  class var_opt_union {
@@ -69,20 +68,20 @@ public:
69
68
  * This method takes an lvalue.
70
69
  * @param sk a sketch to add to the union
71
70
  */
72
- void update(const var_opt_sketch<T,S,A>& sk);
71
+ void update(const var_opt_sketch<T, A>& sk);
73
72
 
74
73
  /**
75
74
  * Updates this union with the given sketch
76
75
  * This method takes an rvalue.
77
76
  * @param sk a sketch to add to the union
78
77
  */
79
- void update(var_opt_sketch<T,S,A>&& sk);
78
+ void update(var_opt_sketch<T, A>&& sk);
80
79
 
81
80
  /**
82
81
  * Gets the varopt sketch resulting from the union of any input sketches.
83
82
  * @return a varopt sketch
84
83
  */
85
- var_opt_sketch<T,S,A> get_result() const;
84
+ var_opt_sketch<T, A> get_result() const;
86
85
 
87
86
  /**
88
87
  * Resets the union to its default, empty state.
@@ -95,7 +94,7 @@ public:
95
94
  * @param instance of a SerDe
96
95
  * @return size in bytes needed to serialize this sketch
97
96
  */
98
- template<typename SerDe = S>
97
+ template<typename SerDe = serde<T>>
99
98
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
100
99
 
101
100
  // This is a convenience alias for users
@@ -111,7 +110,7 @@ public:
111
110
  * @param header_size_bytes space to reserve in front of the sketch
112
111
  * @param instance of a SerDe
113
112
  */
114
- template<typename SerDe = S>
113
+ template<typename SerDe = serde<T>>
115
114
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
116
115
 
117
116
  /**
@@ -120,18 +119,9 @@ public:
120
119
  * @param os output stream
121
120
  * @param instance of a SerDe
122
121
  */
123
- template<typename SerDe = S>
122
+ template<typename SerDe = serde<T>>
124
123
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
125
124
 
126
- /**
127
- * NOTE: This method may be deprecated in a future version.
128
- * This method deserializes a union from a given stream.
129
- * @param is input stream
130
- * @param instance of an Allocator
131
- * @return an instance of a union
132
- */
133
- static var_opt_union deserialize(std::istream& is, const A& allocator = A());
134
-
135
125
  /**
136
126
  * NOTE: This method may be deprecated in a future version.
137
127
  * This method deserializes a union from a given stream.
@@ -140,19 +130,9 @@ public:
140
130
  * @param instance of an Allocator
141
131
  * @return an instance of a union
142
132
  */
143
- template<typename SerDe = S>
133
+ template<typename SerDe = serde<T>>
144
134
  static var_opt_union deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
145
135
 
146
- /**
147
- * NOTE: This method may be deprecated in a future version.
148
- * This method deserializes a union from a given array of bytes.
149
- * @param bytes pointer to the array of bytes
150
- * @param size the size of the array
151
- * @param instance of an Allocator
152
- * @return an instance of a union
153
- */
154
- static var_opt_union deserialize(const void* bytes, size_t size, const A& allocator = A());
155
-
156
136
  /**
157
137
  * NOTE: This method may be deprecated in a future version.
158
138
  * This method deserializes a union from a given array of bytes.
@@ -162,7 +142,7 @@ public:
162
142
  * @param instance of an Allocator
163
143
  * @return an instance of a union
164
144
  */
165
- template<typename SerDe = S>
145
+ template<typename SerDe = serde<T>>
166
146
  static var_opt_union deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
167
147
 
168
148
  /**
@@ -171,9 +151,8 @@ public:
171
151
  */
172
152
  string<A> to_string() const;
173
153
 
174
-
175
154
  private:
176
- typedef typename std::allocator_traits<A>::template rebind_alloc<var_opt_sketch<T,S,A>> AllocSketch;
155
+ typedef typename std::allocator_traits<A>::template rebind_alloc<var_opt_sketch<T, A>> AllocSketch;
177
156
 
178
157
  static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
179
158
  static const uint8_t PREAMBLE_LONGS_NON_EMPTY = 4;
@@ -191,10 +170,10 @@ private:
191
170
 
192
171
  uint32_t max_k_;
193
172
 
194
- var_opt_sketch<T,S,A> gadget_;
173
+ var_opt_sketch<T, A> gadget_;
195
174
 
196
175
  var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
197
- uint32_t max_k, var_opt_sketch<T,S,A>&& gadget);
176
+ uint32_t max_k, var_opt_sketch<T, A>&& gadget);
198
177
 
199
178
  /*
200
179
  IMPORTANT NOTE: the "gadget" in the union object appears to be a varopt sketch,
@@ -250,18 +229,18 @@ private:
250
229
  more importantly, this design choice allows us to exactly re-construct the input sketch
251
230
  when there is only one of them.
252
231
  */
253
- inline void merge_items(const var_opt_sketch<T,S,A>& sk);
254
- inline void merge_items(var_opt_sketch<T,S,A>&& sk);
255
- inline void resolve_tau(const var_opt_sketch<T,S,A>& sketch);
232
+ inline void merge_items(const var_opt_sketch<T, A>& sk);
233
+ inline void merge_items(var_opt_sketch<T, A>&& sk);
234
+ inline void resolve_tau(const var_opt_sketch<T, A>& sketch);
256
235
 
257
236
  double get_outer_tau() const;
258
237
 
259
- var_opt_sketch<T,S,A> simple_gadget_coercer() const;
238
+ var_opt_sketch<T, A> simple_gadget_coercer() const;
260
239
 
261
240
  bool there_exist_unmarked_h_items_lighter_than_target(double threshold) const;
262
- bool detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const;
263
- void mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const;
264
- void migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& sk) const;
241
+ bool detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const;
242
+ void mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const;
243
+ void migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& sk) const;
265
244
 
266
245
  static void check_preamble_longs(uint8_t preamble_longs, uint8_t flags);
267
246
  static void check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver);
@@ -28,17 +28,17 @@
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- template<typename T, typename S, typename A>
32
- var_opt_union<T,S,A>::var_opt_union(uint32_t max_k, const A& allocator) :
31
+ template<typename T, typename A>
32
+ var_opt_union<T, A>::var_opt_union(uint32_t max_k, const A& allocator) :
33
33
  n_(0),
34
34
  outer_tau_numer_(0.0),
35
35
  outer_tau_denom_(0),
36
36
  max_k_(max_k),
37
- gadget_(max_k, var_opt_sketch<T,S,A>::DEFAULT_RESIZE_FACTOR, true, allocator)
37
+ gadget_(max_k, var_opt_sketch<T, A>::DEFAULT_RESIZE_FACTOR, true, allocator)
38
38
  {}
39
39
 
40
- template<typename T, typename S, typename A>
41
- var_opt_union<T,S,A>::var_opt_union(const var_opt_union& other) :
40
+ template<typename T, typename A>
41
+ var_opt_union<T, A>::var_opt_union(const var_opt_union& other) :
42
42
  n_(other.n_),
43
43
  outer_tau_numer_(other.outer_tau_numer_),
44
44
  outer_tau_denom_(other.outer_tau_denom_),
@@ -46,8 +46,8 @@ var_opt_union<T,S,A>::var_opt_union(const var_opt_union& other) :
46
46
  gadget_(other.gadget_)
47
47
  {}
48
48
 
49
- template<typename T, typename S, typename A>
50
- var_opt_union<T,S,A>::var_opt_union(var_opt_union&& other) noexcept :
49
+ template<typename T, typename A>
50
+ var_opt_union<T, A>::var_opt_union(var_opt_union&& other) noexcept :
51
51
  n_(other.n_),
52
52
  outer_tau_numer_(other.outer_tau_numer_),
53
53
  outer_tau_denom_(other.outer_tau_denom_),
@@ -55,9 +55,9 @@ var_opt_union<T,S,A>::var_opt_union(var_opt_union&& other) noexcept :
55
55
  gadget_(std::move(other.gadget_))
56
56
  {}
57
57
 
58
- template<typename T, typename S, typename A>
59
- var_opt_union<T,S,A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
60
- uint32_t max_k, var_opt_sketch<T,S,A>&& gadget) :
58
+ template<typename T, typename A>
59
+ var_opt_union<T, A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
60
+ uint32_t max_k, var_opt_sketch<T, A>&& gadget) :
61
61
  n_(n),
62
62
  outer_tau_numer_(outer_tau_numer),
63
63
  outer_tau_denom_(outer_tau_denom),
@@ -65,12 +65,12 @@ var_opt_union<T,S,A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t
65
65
  gadget_(gadget)
66
66
  {}
67
67
 
68
- template<typename T, typename S, typename A>
69
- var_opt_union<T,S,A>::~var_opt_union() {}
68
+ template<typename T, typename A>
69
+ var_opt_union<T, A>::~var_opt_union() {}
70
70
 
71
- template<typename T, typename S, typename A>
72
- var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(const var_opt_union& other) {
73
- var_opt_union<T,S,A> union_copy(other);
71
+ template<typename T, typename A>
72
+ var_opt_union<T, A>& var_opt_union<T, A>::operator=(const var_opt_union& other) {
73
+ var_opt_union union_copy(other);
74
74
  std::swap(n_, union_copy.n_);
75
75
  std::swap(outer_tau_numer_, union_copy.outer_tau_numer_);
76
76
  std::swap(outer_tau_denom_, union_copy.outer_tau_denom_);
@@ -79,8 +79,8 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(const var_opt_union& other
79
79
  return *this;
80
80
  }
81
81
 
82
- template<typename T, typename S, typename A>
83
- var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
82
+ template<typename T, typename A>
83
+ var_opt_union<T, A>& var_opt_union<T, A>::operator=(var_opt_union&& other) {
84
84
  std::swap(n_, other.n_);
85
85
  std::swap(outer_tau_numer_, other.outer_tau_numer_);
86
86
  std::swap(outer_tau_denom_, other.outer_tau_denom_);
@@ -128,14 +128,9 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
128
128
  * </pre>
129
129
  */
130
130
 
131
- template<typename T, typename S, typename A>
132
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A& allocator) {
133
- return deserialize(is, S(), allocator);
134
- }
135
-
136
- template<typename T, typename S, typename A>
131
+ template<typename T, typename A>
137
132
  template<typename SerDe>
138
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
133
+ var_opt_union<T, A> var_opt_union<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
139
134
  const auto preamble_longs = read<uint8_t>(is);
140
135
  const auto serial_version = read<uint8_t>(is);
141
136
  const auto family_id = read<uint8_t>(is);
@@ -155,29 +150,24 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const S
155
150
  if (!is.good())
156
151
  throw std::runtime_error("error reading from std::istream");
157
152
  else
158
- return var_opt_union<T,S,A>(max_k);
153
+ return var_opt_union(max_k);
159
154
  }
160
155
 
161
156
  const auto items_seen = read<uint64_t>(is);
162
157
  const auto outer_tau_numer = read<double>(is);
163
158
  const auto outer_tau_denom = read<uint64_t>(is);
164
159
 
165
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, sd, allocator);
160
+ var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(is, sd, allocator);
166
161
 
167
162
  if (!is.good())
168
163
  throw std::runtime_error("error reading from std::istream");
169
164
 
170
- return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
171
- }
172
-
173
- template<typename T, typename S, typename A>
174
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
175
- return deserialize(bytes, size, S(), allocator);
165
+ return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
176
166
  }
177
167
 
178
- template<typename T, typename S, typename A>
168
+ template<typename T, typename A>
179
169
  template<typename SerDe>
180
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
170
+ var_opt_union<T, A> var_opt_union<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
181
171
  ensure_minimum_memory(size, 8);
182
172
  const char* ptr = static_cast<const char*>(bytes);
183
173
  uint8_t preamble_longs;
@@ -201,7 +191,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
201
191
  bool is_empty = flags & EMPTY_FLAG_MASK;
202
192
 
203
193
  if (is_empty) {
204
- return var_opt_union<T,S,A>(max_k);
194
+ return var_opt_union(max_k);
205
195
  }
206
196
 
207
197
  uint64_t items_seen;
@@ -212,14 +202,14 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
212
202
  ptr += copy_from_mem(ptr, outer_tau_denom);
213
203
 
214
204
  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
215
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, sd, allocator);
205
+ var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(ptr, gadget_size, sd, allocator);
216
206
 
217
- return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
207
+ return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
218
208
  }
219
209
 
220
- template<typename T, typename S, typename A>
210
+ template<typename T, typename A>
221
211
  template<typename SerDe>
222
- size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
212
+ size_t var_opt_union<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
223
213
  if (n_ == 0) {
224
214
  return PREAMBLE_LONGS_EMPTY << 3;
225
215
  } else {
@@ -227,9 +217,9 @@ size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
227
217
  }
228
218
  }
229
219
 
230
- template<typename T, typename S, typename A>
220
+ template<typename T, typename A>
231
221
  template<typename SerDe>
232
- void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
222
+ void var_opt_union<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
233
223
  bool empty = (n_ == 0);
234
224
 
235
225
  const uint8_t serialization_version(SER_VER);
@@ -259,9 +249,9 @@ void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
259
249
  }
260
250
  }
261
251
 
262
- template<typename T, typename S, typename A>
252
+ template<typename T, typename A>
263
253
  template<typename SerDe>
264
- std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
254
+ std::vector<uint8_t, AllocU8<A>> var_opt_union<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
265
255
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
266
256
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
267
257
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -301,16 +291,16 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
301
291
  return bytes;
302
292
  }
303
293
 
304
- template<typename T, typename S, typename A>
305
- void var_opt_union<T,S,A>::reset() {
294
+ template<typename T, typename A>
295
+ void var_opt_union<T, A>::reset() {
306
296
  n_ = 0;
307
297
  outer_tau_numer_ = 0.0;
308
298
  outer_tau_denom_ = 0;
309
299
  gadget_.reset();
310
300
  }
311
301
 
312
- template<typename T, typename S, typename A>
313
- string<A> var_opt_union<T,S,A>::to_string() const {
302
+ template<typename T, typename A>
303
+ string<A> var_opt_union<T, A>::to_string() const {
314
304
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
315
305
  // The stream does not support passing an allocator instance, and alternatives are complicated.
316
306
  std::ostringstream os;
@@ -323,20 +313,20 @@ string<A> var_opt_union<T,S,A>::to_string() const {
323
313
  return string<A>(os.str().c_str(), gadget_.allocator_);
324
314
  }
325
315
 
326
- template<typename T, typename S, typename A>
327
- void var_opt_union<T,S,A>::update(const var_opt_sketch<T,S,A>& sk) {
316
+ template<typename T, typename A>
317
+ void var_opt_union<T, A>::update(const var_opt_sketch<T, A>& sk) {
328
318
  merge_items(sk);
329
319
  resolve_tau(sk);
330
320
  }
331
321
 
332
- template<typename T, typename S, typename A>
333
- void var_opt_union<T,S,A>::update(var_opt_sketch<T,S,A>&& sk) {
322
+ template<typename T, typename A>
323
+ void var_opt_union<T, A>::update(var_opt_sketch<T, A>&& sk) {
334
324
  merge_items(std::move(sk));
335
325
  resolve_tau(sk); // don't need items, so ok even if they've been moved out
336
326
  }
337
327
 
338
- template<typename T, typename S, typename A>
339
- double var_opt_union<T,S,A>::get_outer_tau() const {
328
+ template<typename T, typename A>
329
+ double var_opt_union<T, A>::get_outer_tau() const {
340
330
  if (outer_tau_denom_ == 0) {
341
331
  return 0.0;
342
332
  } else {
@@ -344,8 +334,8 @@ double var_opt_union<T,S,A>::get_outer_tau() const {
344
334
  }
345
335
  }
346
336
 
347
- template<typename T, typename S, typename A>
348
- void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
337
+ template<typename T, typename A>
338
+ void var_opt_union<T, A>::merge_items(const var_opt_sketch<T, A>& sketch) {
349
339
  if (sketch.n_ == 0) {
350
340
  return;
351
341
  }
@@ -353,8 +343,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
353
343
  n_ += sketch.n_;
354
344
 
355
345
  // H region const_iterator
356
- typename var_opt_sketch<T,S,A>::const_iterator h_itr(sketch, false, false);
357
- typename var_opt_sketch<T,S,A>::const_iterator h_end(sketch, true, false);
346
+ typename var_opt_sketch<T, A>::const_iterator h_itr(sketch, false, false);
347
+ typename var_opt_sketch<T, A>::const_iterator h_end(sketch, true, false);
358
348
  while (h_itr != h_end) {
359
349
  std::pair<const T&, const double> sample = *h_itr;
360
350
  gadget_.update(sample.first, sample.second, false);
@@ -362,8 +352,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
362
352
  }
363
353
 
364
354
  // Weight-correcting R region iterator (const_iterator doesn't do the correction)
365
- typename var_opt_sketch<T,S,A>::iterator r_itr(sketch, false, true);
366
- typename var_opt_sketch<T,S,A>::iterator r_end(sketch, true, true);
355
+ typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
356
+ typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
367
357
  while (r_itr != r_end) {
368
358
  std::pair<const T&, const double> sample = *r_itr;
369
359
  gadget_.update(sample.first, sample.second, true);
@@ -371,8 +361,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
371
361
  }
372
362
  }
373
363
 
374
- template<typename T, typename S, typename A>
375
- void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
364
+ template<typename T, typename A>
365
+ void var_opt_union<T, A>::merge_items(var_opt_sketch<T, A>&& sketch) {
376
366
  if (sketch.n_ == 0) {
377
367
  return;
378
368
  }
@@ -380,8 +370,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
380
370
  n_ += sketch.n_;
381
371
 
382
372
  // H region iterator
383
- typename var_opt_sketch<T,S,A>::iterator h_itr(sketch, false, false);
384
- typename var_opt_sketch<T,S,A>::iterator h_end(sketch, true, false);
373
+ typename var_opt_sketch<T, A>::iterator h_itr(sketch, false, false);
374
+ typename var_opt_sketch<T, A>::iterator h_end(sketch, true, false);
385
375
  while (h_itr != h_end) {
386
376
  std::pair<T&, double> sample = *h_itr;
387
377
  gadget_.update(std::move(sample.first), sample.second, false);
@@ -389,8 +379,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
389
379
  }
390
380
 
391
381
  // Weight-correcting R region iterator
392
- typename var_opt_sketch<T,S,A>::iterator r_itr(sketch, false, true);
393
- typename var_opt_sketch<T,S,A>::iterator r_end(sketch, true, true);
382
+ typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
383
+ typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
394
384
  while (r_itr != r_end) {
395
385
  std::pair<T&, double> sample = *r_itr;
396
386
  gadget_.update(std::move(sample.first), sample.second, true);
@@ -398,8 +388,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
398
388
  }
399
389
  }
400
390
 
401
- template<typename T, typename S, typename A>
402
- void var_opt_union<T,S,A>::resolve_tau(const var_opt_sketch<T,S,A>& sketch) {
391
+ template<typename T, typename A>
392
+ void var_opt_union<T, A>::resolve_tau(const var_opt_sketch<T, A>& sketch) {
403
393
  if (sketch.r_ > 0) {
404
394
  const double sketch_tau = sketch.get_tau();
405
395
  const double outer_tau = get_outer_tau();
@@ -425,8 +415,8 @@ void var_opt_union<T,S,A>::resolve_tau(const var_opt_sketch<T,S,A>& sketch) {
425
415
  }
426
416
  }
427
417
 
428
- template<typename T, typename S, typename A>
429
- var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
418
+ template<typename T, typename A>
419
+ var_opt_sketch<T, A> var_opt_union<T, A>::get_result() const {
430
420
  // If no marked items in H, gadget is already valid mathematically. We can return what is
431
421
  // basically just a copy of the gadget.
432
422
  if (gadget_.num_marks_in_h_ == 0) {
@@ -435,7 +425,7 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
435
425
  // Copy of gadget. This may produce needless copying in the
436
426
  // pseudo-exact case below, but should simplify the code without
437
427
  // needing to make the gadget a pointer
438
- var_opt_sketch<T,S,A> gcopy(gadget_, false, n_);
428
+ var_opt_sketch<T, A> gcopy(gadget_, false, n_);
439
429
 
440
430
  // At this point, we know that marked items are present in H. So:
441
431
  // 1. Result will necessarily be in estimation mode
@@ -456,15 +446,15 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
456
446
  *
457
447
  * @return A shallow copy of the gadget as valid varopt sketch
458
448
  */
459
- template<typename T, typename S, typename A>
460
- var_opt_sketch<T,S,A> var_opt_union<T,S,A>::simple_gadget_coercer() const {
449
+ template<typename T, typename A>
450
+ var_opt_sketch<T, A> var_opt_union<T, A>::simple_gadget_coercer() const {
461
451
  if (gadget_.num_marks_in_h_ != 0) throw std::logic_error("simple gadget coercer only applies if no marks");
462
- return var_opt_sketch<T,S,A>(gadget_, true, n_);
452
+ return var_opt_sketch<T, A>(gadget_, true, n_);
463
453
  }
464
454
 
465
455
  // this is a condition checked in detect_and_handle_subcase_of_pseudo_exact()
466
- template<typename T, typename S, typename A>
467
- bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
456
+ template<typename T, typename A>
457
+ bool var_opt_union<T, A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
468
458
  for (uint32_t i = 0; i < gadget_.h_; ++i) {
469
459
  if ((gadget_.weights_[i] < threshold) && !gadget_.marks_[i]) {
470
460
  return true;
@@ -473,8 +463,8 @@ bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(doub
473
463
  return false;
474
464
  }
475
465
 
476
- template<typename T, typename S, typename A>
477
- bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const {
466
+ template<typename T, typename A>
467
+ bool var_opt_union<T, A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const {
478
468
  // gadget is seemingly exact
479
469
  const bool condition1 = gadget_.r_ == 0;
480
470
 
@@ -510,8 +500,8 @@ bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_ske
510
500
  *
511
501
  * @param sk Copy of the gadget, modified with marked items moved to the reservoir
512
502
  */
513
- template<typename T, typename S, typename A>
514
- void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const {
503
+ template<typename T, typename A>
504
+ void var_opt_union<T, A>::mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const {
515
505
  const uint32_t result_k = gadget_.h_ + gadget_.r_;
516
506
 
517
507
  uint32_t result_h = 0;
@@ -583,8 +573,8 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
583
573
  }
584
574
 
585
575
  // this is basically a continuation of get_result(), but modifying the input gadget copy
586
- template<typename T, typename S, typename A>
587
- void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& gcopy) const {
576
+ template<typename T, typename A>
577
+ void var_opt_union<T, A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& gcopy) const {
588
578
  const uint32_t r_count = gcopy.r_;
589
579
  const uint32_t h_count = gcopy.h_;
590
580
  const uint32_t k = gcopy.k_;
@@ -616,8 +606,8 @@ void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T
616
606
  gcopy.strip_marks();
617
607
  }
618
608
 
619
- template<typename T, typename S, typename A>
620
- void var_opt_union<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
609
+ template<typename T, typename A>
610
+ void var_opt_union<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
621
611
  bool is_empty(flags & EMPTY_FLAG_MASK);
622
612
 
623
613
  if (is_empty) {
@@ -635,8 +625,8 @@ void var_opt_union<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
635
625
  }
636
626
  }
637
627
 
638
- template<typename T, typename S, typename A>
639
- void var_opt_union<T,S,A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
628
+ template<typename T, typename A>
629
+ void var_opt_union<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
640
630
  if (family_id == FAMILY_ID) {
641
631
  if (ser_ver != SER_VER) {
642
632
  throw std::invalid_argument("Possible corruption: VarOpt Union serialization version must be "
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(sampling_test)
19
19
 
20
- target_link_libraries(sampling_test sampling common_test)
20
+ target_link_libraries(sampling_test sampling common_test_lib)
21
21
 
22
22
  set_target_properties(sampling_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -28,8 +28,8 @@
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- using var_opt_test_sketch = var_opt_sketch<test_type, test_type_serde, test_allocator<test_type>>;
32
- using var_opt_test_union = var_opt_union<test_type, test_type_serde, test_allocator<test_type>>;
31
+ using var_opt_test_sketch = var_opt_sketch<test_type, test_allocator<test_type>>;
32
+ using var_opt_test_union = var_opt_union<test_type, test_allocator<test_type>>;
33
33
  using alloc = test_allocator<test_type>;
34
34
 
35
35
  TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
@@ -38,19 +38,19 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
38
38
  {
39
39
  var_opt_test_sketch sk1(10, var_opt_test_sketch::DEFAULT_RESIZE_FACTOR, 0);
40
40
  for (int i = 0; i < 100; ++i) sk1.update(i);
41
- auto bytes1 = sk1.serialize();
41
+ auto bytes1 = sk1.serialize(0, test_type_serde());
42
42
  auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), test_type_serde(), 0);
43
43
 
44
44
  std::stringstream ss;
45
- sk1.serialize(ss);
46
- auto sk3 = var_opt_test_sketch::deserialize(ss, alloc(0));
45
+ sk1.serialize(ss, test_type_serde());
46
+ auto sk3 = var_opt_test_sketch::deserialize(ss, test_type_serde(), alloc(0));
47
47
 
48
48
  var_opt_test_union u1(10, 0);
49
49
  u1.update(sk1);
50
50
  u1.update(sk2);
51
51
  u1.update(sk3);
52
52
 
53
- auto bytes2 = u1.serialize();
53
+ auto bytes2 = u1.serialize(0, test_type_serde());
54
54
  auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), test_type_serde(), 0);
55
55
  }
56
56
  REQUIRE(test_allocator_total_bytes == 0);
@@ -47,8 +47,8 @@ static var_opt_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
47
47
  return sk;
48
48
  }
49
49
 
50
- template<typename T, typename S, typename A>
51
- static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk2) {
50
+ template<typename T, typename A>
51
+ static void check_if_equal(var_opt_sketch<T, A>& sk1, var_opt_sketch<T, A>& sk2) {
52
52
  REQUIRE(sk1.get_k() == sk2.get_k());
53
53
  REQUIRE(sk1.get_n() == sk2.get_n());
54
54
  REQUIRE(sk1.get_num_samples() == sk2.get_num_samples());
@@ -49,8 +49,8 @@ static var_opt_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
49
49
 
50
50
  // if exact_compare = false, checks for equivalence -- specific R region values may differ but
51
51
  // R region weights must match
52
- template<typename T, typename S, typename A>
53
- static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk2, bool exact_compare = true) {
52
+ template<typename T, typename A>
53
+ static void check_if_equal(var_opt_sketch<T, A>& sk1, var_opt_sketch<T, A>& sk2, bool exact_compare = true) {
54
54
  REQUIRE(sk1.get_k() == sk2.get_k());
55
55
  REQUIRE(sk1.get_n() == sk2.get_n());
56
56
  REQUIRE(sk1.get_num_samples() == sk2.get_num_samples());
@@ -78,8 +78,8 @@ static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk
78
78
  // ensure that the resulting binary images are compatible.
79
79
  // if exact_compare = false, checks for equivalence -- specific R region values may differ but
80
80
  // R region weights must match
81
- template<typename T, typename S, typename A>
82
- static void compare_serialization_deserialization(var_opt_union<T,S,A>& vo_union, bool exact_compare = true) {
81
+ template<typename T, typename A>
82
+ static void compare_serialization_deserialization(var_opt_union<T,A>& vo_union, bool exact_compare = true) {
83
83
  std::vector<uint8_t> bytes = vo_union.serialize();
84
84
 
85
85
  var_opt_union<T> u_from_bytes = var_opt_union<T>::deserialize(bytes.data(), bytes.size());
@@ -22,6 +22,8 @@ import os
22
22
  import sys
23
23
  import platform
24
24
  import subprocess
25
+ import re
26
+ from datetime import datetime, timezone
25
27
 
26
28
  from setuptools import setup, find_packages, Extension
27
29
  from setuptools.command.build_ext import build_ext
@@ -78,9 +80,19 @@ class CMakeBuild(build_ext):
78
80
  cwd=self.build_temp, env=env)
79
81
  print() # add an empty line to pretty print
80
82
 
83
+ # Read and parse the version format
84
+ # @DT@ -> datestamp
85
+ # @HHMM@ -> .devHHMM to indicate development version
86
+ # Releases should have a fixed version with no @ variables
87
+ with open('version.cfg.in', 'r') as file:
88
+ ds_version = file.read().rstrip()
89
+ dt = datetime.now(timezone.utc)
90
+ ds_version = re.sub('@DT@', dt.strftime('%Y%m%d'), ds_version)
91
+ ds_version = re.sub('@HHMM@', 'dev' + dt.strftime('%H%M'), ds_version)
92
+
81
93
  setup(
82
94
  name='datasketches',
83
- version='3.5.1',
95
+ version=ds_version,
84
96
  author='Apache Software Foundation',
85
97
  author_email='dev@datasketches.apache.org',
86
98
  description='The Apache DataSketches Library for Python',
@@ -88,7 +100,7 @@ setup(
88
100
  url='http://datasketches.apache.org',
89
101
  long_description=open('python/README.md').read(),
90
102
  long_description_content_type='text/markdown',
91
- packages=find_packages(where='python',exclude=['src','*tests*']), # src not needed if only the.so
103
+ packages=find_packages(where='python',exclude=['src','*tests*']), # src not needed if only the .so
92
104
  package_dir={'':'python'},
93
105
  # may need to add all source paths for sdist packages w/o MANIFEST.in
94
106
  ext_modules=[CMakeExtension('datasketches')],