datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: datasketches
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-12-09 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|
|
@@ -65,12 +65,14 @@ files:
|
|
|
65
65
|
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
|
|
66
66
|
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
|
|
67
67
|
- vendor/datasketches-cpp/common/include/memory_operations.hpp
|
|
68
|
-
- vendor/datasketches-cpp/common/include/
|
|
69
|
-
- vendor/datasketches-cpp/common/include/
|
|
68
|
+
- vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
|
|
69
|
+
- vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
|
|
70
70
|
- vendor/datasketches-cpp/common/include/serde.hpp
|
|
71
|
+
- vendor/datasketches-cpp/common/include/version.hpp.in
|
|
71
72
|
- vendor/datasketches-cpp/common/test/CMakeLists.txt
|
|
72
73
|
- vendor/datasketches-cpp/common/test/catch_runner.cpp
|
|
73
74
|
- vendor/datasketches-cpp/common/test/integration_test.cpp
|
|
75
|
+
- vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
|
|
74
76
|
- vendor/datasketches-cpp/common/test/test_allocator.cpp
|
|
75
77
|
- vendor/datasketches-cpp/common/test/test_allocator.hpp
|
|
76
78
|
- vendor/datasketches-cpp/common/test/test_type.hpp
|
|
@@ -171,6 +173,9 @@ files:
|
|
|
171
173
|
- vendor/datasketches-cpp/pyproject.toml
|
|
172
174
|
- vendor/datasketches-cpp/python/CMakeLists.txt
|
|
173
175
|
- vendor/datasketches-cpp/python/README.md
|
|
176
|
+
- vendor/datasketches-cpp/python/datasketches/PySerDe.py
|
|
177
|
+
- vendor/datasketches-cpp/python/datasketches/__init__.py
|
|
178
|
+
- vendor/datasketches-cpp/python/include/py_serde.hpp
|
|
174
179
|
- vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
|
|
175
180
|
- vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
|
|
176
181
|
- vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
|
|
@@ -184,6 +189,7 @@ files:
|
|
|
184
189
|
- vendor/datasketches-cpp/python/src/hll_wrapper.cpp
|
|
185
190
|
- vendor/datasketches-cpp/python/src/kll_wrapper.cpp
|
|
186
191
|
- vendor/datasketches-cpp/python/src/ks_wrapper.cpp
|
|
192
|
+
- vendor/datasketches-cpp/python/src/py_serde.cpp
|
|
187
193
|
- vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
|
|
188
194
|
- vendor/datasketches-cpp/python/src/req_wrapper.cpp
|
|
189
195
|
- vendor/datasketches-cpp/python/src/theta_wrapper.cpp
|
|
@@ -283,6 +289,7 @@ files:
|
|
|
283
289
|
- vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
|
|
284
290
|
- vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
|
|
285
291
|
- vendor/datasketches-cpp/theta/test/theta_union_test.cpp
|
|
292
|
+
- vendor/datasketches-cpp/tox.ini
|
|
286
293
|
- vendor/datasketches-cpp/tuple/CMakeLists.txt
|
|
287
294
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
|
|
288
295
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
|
|
@@ -308,12 +315,14 @@ files:
|
|
|
308
315
|
- vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
|
|
309
316
|
- vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
|
|
310
317
|
- vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
|
|
318
|
+
- vendor/datasketches-cpp/tuple/test/engagement_test.cpp
|
|
311
319
|
- vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
|
|
312
320
|
- vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
|
|
313
321
|
- vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
|
|
314
322
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
|
|
315
323
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
|
|
316
324
|
- vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
|
|
325
|
+
- vendor/datasketches-cpp/version.cfg.in
|
|
317
326
|
homepage: https://github.com/ankane/datasketches-ruby
|
|
318
327
|
licenses:
|
|
319
328
|
- Apache-2.0
|
|
@@ -326,7 +335,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
326
335
|
requirements:
|
|
327
336
|
- - ">="
|
|
328
337
|
- !ruby/object:Gem::Version
|
|
329
|
-
version: '2.
|
|
338
|
+
version: '2.7'
|
|
330
339
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
331
340
|
requirements:
|
|
332
341
|
- - ">="
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
|
|
21
|
-
#define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include <algorithm>
|
|
24
|
-
#include <stdexcept>
|
|
25
|
-
|
|
26
|
-
namespace datasketches {
|
|
27
|
-
|
|
28
|
-
template<typename T, typename C, typename A>
|
|
29
|
-
quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
|
|
30
|
-
total_weight_(0),
|
|
31
|
-
entries_(allocator)
|
|
32
|
-
{
|
|
33
|
-
entries_.reserve(num);
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
template<typename T, typename C, typename A>
|
|
37
|
-
template<typename Iterator>
|
|
38
|
-
void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
|
|
39
|
-
const size_t size_before = entries_.size();
|
|
40
|
-
for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
|
|
41
|
-
if (size_before > 0) {
|
|
42
|
-
Container tmp(entries_.get_allocator());
|
|
43
|
-
tmp.reserve(entries_.capacity());
|
|
44
|
-
std::merge(
|
|
45
|
-
entries_.begin(), entries_.begin() + size_before,
|
|
46
|
-
entries_.begin() + size_before, entries_.end(),
|
|
47
|
-
std::back_inserter(tmp), compare_pairs_by_first()
|
|
48
|
-
);
|
|
49
|
-
std::swap(tmp, entries_);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
template<typename T, typename C, typename A>
|
|
54
|
-
template<bool inclusive>
|
|
55
|
-
void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
|
|
56
|
-
uint64_t subtotal = 0;
|
|
57
|
-
for (auto& entry: entries_) {
|
|
58
|
-
const uint64_t new_subtotal = subtotal + entry.second;
|
|
59
|
-
entry.second = inclusive ? new_subtotal : subtotal;
|
|
60
|
-
subtotal = new_subtotal;
|
|
61
|
-
}
|
|
62
|
-
total_weight_ = subtotal;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
template<typename T, typename C, typename A>
|
|
66
|
-
auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
|
|
67
|
-
if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
|
|
68
|
-
uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
|
|
69
|
-
auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
|
|
70
|
-
if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
|
|
71
|
-
return deref_helper(it->first);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
template<typename T, typename C, typename A>
|
|
75
|
-
auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
|
|
76
|
-
return entries_.begin();
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
template<typename T, typename C, typename A>
|
|
80
|
-
auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
|
|
81
|
-
return entries_.end();
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
template<typename T, typename C, typename A>
|
|
85
|
-
size_t quantile_sketch_sorted_view<T, C, A>::size() const {
|
|
86
|
-
return entries_.size();
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
} /* namespace datasketches */
|
|
90
|
-
|
|
91
|
-
#endif
|