datasketches 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -51,7 +51,7 @@ struct subset_summary {
|
|
51
51
|
double total_sketch_weight;
|
52
52
|
};
|
53
53
|
|
54
|
-
template <typename T, typename
|
54
|
+
template <typename T, typename A> class var_opt_union; // forward declaration
|
55
55
|
|
56
56
|
namespace var_opt_constants {
|
57
57
|
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
@@ -60,7 +60,6 @@ namespace var_opt_constants {
|
|
60
60
|
|
61
61
|
template<
|
62
62
|
typename T,
|
63
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
64
63
|
typename A = std::allocator<T>
|
65
64
|
>
|
66
65
|
class var_opt_sketch {
|
@@ -142,7 +141,7 @@ class var_opt_sketch {
|
|
142
141
|
* @param instance of a SerDe
|
143
142
|
* @return size in bytes needed to serialize this sketch
|
144
143
|
*/
|
145
|
-
template<typename TT = T, typename SerDe =
|
144
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
146
145
|
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
147
146
|
|
148
147
|
/**
|
@@ -151,7 +150,7 @@ class var_opt_sketch {
|
|
151
150
|
* @param instance of a SerDe
|
152
151
|
* @return size in bytes needed to serialize this sketch
|
153
152
|
*/
|
154
|
-
template<typename TT = T, typename SerDe =
|
153
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
155
154
|
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
156
155
|
|
157
156
|
// This is a convenience alias for users
|
@@ -166,7 +165,7 @@ class var_opt_sketch {
|
|
166
165
|
* @param header_size_bytes space to reserve in front of the sketch
|
167
166
|
* @param instance of a SerDe
|
168
167
|
*/
|
169
|
-
template<typename SerDe =
|
168
|
+
template<typename SerDe = serde<T>>
|
170
169
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
171
170
|
|
172
171
|
/**
|
@@ -174,19 +173,9 @@ class var_opt_sketch {
|
|
174
173
|
* @param os output stream
|
175
174
|
* @param instance of a SerDe
|
176
175
|
*/
|
177
|
-
template<typename SerDe =
|
176
|
+
template<typename SerDe = serde<T>>
|
178
177
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
179
178
|
|
180
|
-
/**
|
181
|
-
* This method deserializes a sketch from a given stream.
|
182
|
-
* @param is input stream
|
183
|
-
* @param instance of an Allocator
|
184
|
-
* @return an instance of a sketch
|
185
|
-
*
|
186
|
-
* Deprecated, to be removed in the next major version
|
187
|
-
*/
|
188
|
-
static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
|
189
|
-
|
190
179
|
/**
|
191
180
|
* This method deserializes a sketch from a given stream.
|
192
181
|
* @param is input stream
|
@@ -194,20 +183,9 @@ class var_opt_sketch {
|
|
194
183
|
* @param instance of an Allocator
|
195
184
|
* @return an instance of a sketch
|
196
185
|
*/
|
197
|
-
template<typename SerDe =
|
186
|
+
template<typename SerDe = serde<T>>
|
198
187
|
static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
199
188
|
|
200
|
-
/**
|
201
|
-
* This method deserializes a sketch from a given array of bytes.
|
202
|
-
* @param bytes pointer to the array of bytes
|
203
|
-
* @param size the size of the array
|
204
|
-
* @param instance of an Allocator
|
205
|
-
* @return an instance of a sketch
|
206
|
-
*
|
207
|
-
* Deprecated, to be removed in the next major version
|
208
|
-
*/
|
209
|
-
static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
210
|
-
|
211
189
|
/**
|
212
190
|
* This method deserializes a sketch from a given array of bytes.
|
213
191
|
* @param bytes pointer to the array of bytes
|
@@ -216,7 +194,7 @@ class var_opt_sketch {
|
|
216
194
|
* @param instance of an Allocator
|
217
195
|
* @return an instance of a sketch
|
218
196
|
*/
|
219
|
-
template<typename SerDe =
|
197
|
+
template<typename SerDe = serde<T>>
|
220
198
|
static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
221
199
|
|
222
200
|
/**
|
@@ -303,10 +281,9 @@ class var_opt_sketch {
|
|
303
281
|
std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
|
304
282
|
std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
|
305
283
|
|
306
|
-
friend class var_opt_union<T,
|
284
|
+
friend class var_opt_union<T, A>;
|
307
285
|
var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
|
308
|
-
|
309
|
-
|
286
|
+
|
310
287
|
string<A> items_to_string(bool print_gap) const;
|
311
288
|
|
312
289
|
// internal-use-only update
|
@@ -368,8 +345,8 @@ class var_opt_sketch {
|
|
368
345
|
class iterator;
|
369
346
|
};
|
370
347
|
|
371
|
-
template<typename T, typename
|
372
|
-
class var_opt_sketch<T,
|
348
|
+
template<typename T, typename A>
|
349
|
+
class var_opt_sketch<T, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
|
373
350
|
public:
|
374
351
|
const_iterator(const const_iterator& other);
|
375
352
|
const_iterator& operator++();
|
@@ -379,29 +356,28 @@ public:
|
|
379
356
|
const std::pair<const T&, const double> operator*() const;
|
380
357
|
|
381
358
|
private:
|
382
|
-
friend class var_opt_sketch<T,
|
383
|
-
friend class var_opt_union<T,
|
359
|
+
friend class var_opt_sketch<T, A>;
|
360
|
+
friend class var_opt_union<T, A>;
|
384
361
|
|
385
362
|
// default iterator over full sketch
|
386
|
-
const_iterator(const var_opt_sketch<T,
|
363
|
+
const_iterator(const var_opt_sketch<T, A>& sk, bool is_end);
|
387
364
|
|
388
365
|
// iterates over only one of the H or R region, optionally applying weight correction
|
389
366
|
// to R region (can correct for numerical precision issues)
|
390
|
-
const_iterator(const var_opt_sketch<T,
|
367
|
+
const_iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
|
391
368
|
|
392
369
|
bool get_mark() const;
|
393
370
|
|
394
|
-
const var_opt_sketch<T,
|
371
|
+
const var_opt_sketch<T, A>* sk_;
|
395
372
|
double cum_r_weight_; // used for weight correction
|
396
373
|
double r_item_wt_;
|
397
374
|
size_t idx_;
|
398
375
|
const size_t final_idx_;
|
399
|
-
// bool weight_correction_;
|
400
376
|
};
|
401
377
|
|
402
378
|
// non-const iterator for internal use
|
403
|
-
template<typename T, typename
|
404
|
-
class var_opt_sketch<T,
|
379
|
+
template<typename T, typename A>
|
380
|
+
class var_opt_sketch<T, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
|
405
381
|
public:
|
406
382
|
iterator(const iterator& other);
|
407
383
|
iterator& operator++();
|
@@ -411,16 +387,16 @@ public:
|
|
411
387
|
std::pair<T&, double> operator*();
|
412
388
|
|
413
389
|
private:
|
414
|
-
friend class var_opt_sketch<T,
|
415
|
-
friend class var_opt_union<T,
|
390
|
+
friend class var_opt_sketch<T, A>;
|
391
|
+
friend class var_opt_union<T, A>;
|
416
392
|
|
417
393
|
// iterates over only one of the H or R region, applying weight correction
|
418
394
|
// if iterating over R region (can correct for numerical precision issues)
|
419
|
-
iterator(const var_opt_sketch<T,
|
395
|
+
iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
|
420
396
|
|
421
397
|
bool get_mark() const;
|
422
398
|
|
423
|
-
const var_opt_sketch<T,
|
399
|
+
const var_opt_sketch<T, A>* sk_;
|
424
400
|
double cum_r_weight_; // used for weight correction
|
425
401
|
double r_item_wt_;
|
426
402
|
size_t idx_;
|