datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -51,7 +51,7 @@ struct subset_summary {
|
|
|
51
51
|
double total_sketch_weight;
|
|
52
52
|
};
|
|
53
53
|
|
|
54
|
-
template <typename T, typename
|
|
54
|
+
template <typename T, typename A> class var_opt_union; // forward declaration
|
|
55
55
|
|
|
56
56
|
namespace var_opt_constants {
|
|
57
57
|
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
|
@@ -60,7 +60,6 @@ namespace var_opt_constants {
|
|
|
60
60
|
|
|
61
61
|
template<
|
|
62
62
|
typename T,
|
|
63
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
64
63
|
typename A = std::allocator<T>
|
|
65
64
|
>
|
|
66
65
|
class var_opt_sketch {
|
|
@@ -142,7 +141,7 @@ class var_opt_sketch {
|
|
|
142
141
|
* @param instance of a SerDe
|
|
143
142
|
* @return size in bytes needed to serialize this sketch
|
|
144
143
|
*/
|
|
145
|
-
template<typename TT = T, typename SerDe =
|
|
144
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
146
145
|
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
147
146
|
|
|
148
147
|
/**
|
|
@@ -151,7 +150,7 @@ class var_opt_sketch {
|
|
|
151
150
|
* @param instance of a SerDe
|
|
152
151
|
* @return size in bytes needed to serialize this sketch
|
|
153
152
|
*/
|
|
154
|
-
template<typename TT = T, typename SerDe =
|
|
153
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
155
154
|
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
156
155
|
|
|
157
156
|
// This is a convenience alias for users
|
|
@@ -166,7 +165,7 @@ class var_opt_sketch {
|
|
|
166
165
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
167
166
|
* @param instance of a SerDe
|
|
168
167
|
*/
|
|
169
|
-
template<typename SerDe =
|
|
168
|
+
template<typename SerDe = serde<T>>
|
|
170
169
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
171
170
|
|
|
172
171
|
/**
|
|
@@ -174,19 +173,9 @@ class var_opt_sketch {
|
|
|
174
173
|
* @param os output stream
|
|
175
174
|
* @param instance of a SerDe
|
|
176
175
|
*/
|
|
177
|
-
template<typename SerDe =
|
|
176
|
+
template<typename SerDe = serde<T>>
|
|
178
177
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
179
178
|
|
|
180
|
-
/**
|
|
181
|
-
* This method deserializes a sketch from a given stream.
|
|
182
|
-
* @param is input stream
|
|
183
|
-
* @param instance of an Allocator
|
|
184
|
-
* @return an instance of a sketch
|
|
185
|
-
*
|
|
186
|
-
* Deprecated, to be removed in the next major version
|
|
187
|
-
*/
|
|
188
|
-
static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
189
|
-
|
|
190
179
|
/**
|
|
191
180
|
* This method deserializes a sketch from a given stream.
|
|
192
181
|
* @param is input stream
|
|
@@ -194,20 +183,9 @@ class var_opt_sketch {
|
|
|
194
183
|
* @param instance of an Allocator
|
|
195
184
|
* @return an instance of a sketch
|
|
196
185
|
*/
|
|
197
|
-
template<typename SerDe =
|
|
186
|
+
template<typename SerDe = serde<T>>
|
|
198
187
|
static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
199
188
|
|
|
200
|
-
/**
|
|
201
|
-
* This method deserializes a sketch from a given array of bytes.
|
|
202
|
-
* @param bytes pointer to the array of bytes
|
|
203
|
-
* @param size the size of the array
|
|
204
|
-
* @param instance of an Allocator
|
|
205
|
-
* @return an instance of a sketch
|
|
206
|
-
*
|
|
207
|
-
* Deprecated, to be removed in the next major version
|
|
208
|
-
*/
|
|
209
|
-
static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
210
|
-
|
|
211
189
|
/**
|
|
212
190
|
* This method deserializes a sketch from a given array of bytes.
|
|
213
191
|
* @param bytes pointer to the array of bytes
|
|
@@ -216,7 +194,7 @@ class var_opt_sketch {
|
|
|
216
194
|
* @param instance of an Allocator
|
|
217
195
|
* @return an instance of a sketch
|
|
218
196
|
*/
|
|
219
|
-
template<typename SerDe =
|
|
197
|
+
template<typename SerDe = serde<T>>
|
|
220
198
|
static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
221
199
|
|
|
222
200
|
/**
|
|
@@ -303,10 +281,9 @@ class var_opt_sketch {
|
|
|
303
281
|
std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
|
|
304
282
|
std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
|
|
305
283
|
|
|
306
|
-
friend class var_opt_union<T,
|
|
284
|
+
friend class var_opt_union<T, A>;
|
|
307
285
|
var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
|
|
308
|
-
|
|
309
|
-
|
|
286
|
+
|
|
310
287
|
string<A> items_to_string(bool print_gap) const;
|
|
311
288
|
|
|
312
289
|
// internal-use-only update
|
|
@@ -368,8 +345,8 @@ class var_opt_sketch {
|
|
|
368
345
|
class iterator;
|
|
369
346
|
};
|
|
370
347
|
|
|
371
|
-
template<typename T, typename
|
|
372
|
-
class var_opt_sketch<T,
|
|
348
|
+
template<typename T, typename A>
|
|
349
|
+
class var_opt_sketch<T, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
|
|
373
350
|
public:
|
|
374
351
|
const_iterator(const const_iterator& other);
|
|
375
352
|
const_iterator& operator++();
|
|
@@ -379,29 +356,28 @@ public:
|
|
|
379
356
|
const std::pair<const T&, const double> operator*() const;
|
|
380
357
|
|
|
381
358
|
private:
|
|
382
|
-
friend class var_opt_sketch<T,
|
|
383
|
-
friend class var_opt_union<T,
|
|
359
|
+
friend class var_opt_sketch<T, A>;
|
|
360
|
+
friend class var_opt_union<T, A>;
|
|
384
361
|
|
|
385
362
|
// default iterator over full sketch
|
|
386
|
-
const_iterator(const var_opt_sketch<T,
|
|
363
|
+
const_iterator(const var_opt_sketch<T, A>& sk, bool is_end);
|
|
387
364
|
|
|
388
365
|
// iterates over only one of the H or R region, optionally applying weight correction
|
|
389
366
|
// to R region (can correct for numerical precision issues)
|
|
390
|
-
const_iterator(const var_opt_sketch<T,
|
|
367
|
+
const_iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
|
|
391
368
|
|
|
392
369
|
bool get_mark() const;
|
|
393
370
|
|
|
394
|
-
const var_opt_sketch<T,
|
|
371
|
+
const var_opt_sketch<T, A>* sk_;
|
|
395
372
|
double cum_r_weight_; // used for weight correction
|
|
396
373
|
double r_item_wt_;
|
|
397
374
|
size_t idx_;
|
|
398
375
|
const size_t final_idx_;
|
|
399
|
-
// bool weight_correction_;
|
|
400
376
|
};
|
|
401
377
|
|
|
402
378
|
// non-const iterator for internal use
|
|
403
|
-
template<typename T, typename
|
|
404
|
-
class var_opt_sketch<T,
|
|
379
|
+
template<typename T, typename A>
|
|
380
|
+
class var_opt_sketch<T, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
|
|
405
381
|
public:
|
|
406
382
|
iterator(const iterator& other);
|
|
407
383
|
iterator& operator++();
|
|
@@ -411,16 +387,16 @@ public:
|
|
|
411
387
|
std::pair<T&, double> operator*();
|
|
412
388
|
|
|
413
389
|
private:
|
|
414
|
-
friend class var_opt_sketch<T,
|
|
415
|
-
friend class var_opt_union<T,
|
|
390
|
+
friend class var_opt_sketch<T, A>;
|
|
391
|
+
friend class var_opt_union<T, A>;
|
|
416
392
|
|
|
417
393
|
// iterates over only one of the H or R region, applying weight correction
|
|
418
394
|
// if iterating over R region (can correct for numerical precision issues)
|
|
419
|
-
iterator(const var_opt_sketch<T,
|
|
395
|
+
iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
|
|
420
396
|
|
|
421
397
|
bool get_mark() const;
|
|
422
398
|
|
|
423
|
-
const var_opt_sketch<T,
|
|
399
|
+
const var_opt_sketch<T, A>* sk_;
|
|
424
400
|
double cum_r_weight_; // used for weight correction
|
|
425
401
|
double r_item_wt_;
|
|
426
402
|
size_t idx_;
|