datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
|
|
18
|
-
import unittest
|
|
19
|
-
from datasketches import (vector_of_kll_ints_sketches,
|
|
20
|
-
vector_of_kll_floats_sketches)
|
|
21
|
-
import numpy as np
|
|
22
|
-
|
|
23
|
-
class VectorOfKllSketchesTest(unittest.TestCase):
|
|
24
|
-
def test_vector_of_kll_floats_sketches_example(self):
|
|
25
|
-
k = 200
|
|
26
|
-
d = 3
|
|
27
|
-
n = 2 ** 20
|
|
28
|
-
|
|
29
|
-
# create a sketch and inject ~1 million N(0,1) points
|
|
30
|
-
kll = vector_of_kll_floats_sketches(k, d)
|
|
31
|
-
# Track the min/max for each sketch to test later
|
|
32
|
-
smin = np.zeros(d) + np.inf
|
|
33
|
-
smax = np.zeros(d) - np.inf
|
|
34
|
-
|
|
35
|
-
for i in range(0, n):
|
|
36
|
-
dat = np.random.randn(d)
|
|
37
|
-
smin = np.amin([smin, dat], axis=0)
|
|
38
|
-
smax = np.amax([smax, dat], axis=0)
|
|
39
|
-
kll.update(dat)
|
|
40
|
-
|
|
41
|
-
# 0 should be near the median
|
|
42
|
-
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
|
|
43
|
-
# the median should be near 0
|
|
44
|
-
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
|
|
45
|
-
# we also track the min/max independently from the rest of the data
|
|
46
|
-
# which lets us know the full observed data range
|
|
47
|
-
np.testing.assert_allclose(kll.get_min_values(), smin)
|
|
48
|
-
np.testing.assert_allclose(kll.get_max_values(), smax)
|
|
49
|
-
np.testing.assert_array_less(kll.get_min_values(), kll.get_quantiles(0.01)[:,0])
|
|
50
|
-
np.testing.assert_array_less(kll.get_quantiles(0.99)[:,0], kll.get_max_values())
|
|
51
|
-
|
|
52
|
-
# we can also extract a list of values at a time,
|
|
53
|
-
# here the values should give us something close to [-2, -1, 0, 1, 2].
|
|
54
|
-
# then get the CDF, which will return something close to
|
|
55
|
-
# the original values used in get_quantiles()
|
|
56
|
-
# finally, can check the normalized rank error bound
|
|
57
|
-
pts = kll.get_quantiles([0.0228, 0.1587, 0.5, 0.8413, 0.9772])
|
|
58
|
-
# use the mean pts for the CDF, include 1.0 at end to account for all probability mass
|
|
59
|
-
meanpts = np.mean(pts, axis=0)
|
|
60
|
-
cdf = kll.get_cdf(meanpts)
|
|
61
|
-
self.assertEqual(cdf.shape[0], pts.shape[0])
|
|
62
|
-
self.assertEqual(cdf.shape[1], pts.shape[1]+1)
|
|
63
|
-
|
|
64
|
-
# and a few basic queries about the sketch
|
|
65
|
-
self.assertFalse(np.all(kll.is_empty()))
|
|
66
|
-
self.assertTrue(np.all(kll.is_estimation_mode()))
|
|
67
|
-
self.assertTrue(np.all(kll.get_n() == n))
|
|
68
|
-
self.assertTrue(np.all(kll.get_num_retained() < n))
|
|
69
|
-
|
|
70
|
-
# we can combine sketches across all dimensions and get the reuslt
|
|
71
|
-
result = kll.collapse()
|
|
72
|
-
self.assertEqual(result.get_n(), d * n)
|
|
73
|
-
|
|
74
|
-
# merging a copy of itself will double the number of items the sketch has seen
|
|
75
|
-
kll_copy = vector_of_kll_floats_sketches(kll)
|
|
76
|
-
kll.merge(kll_copy)
|
|
77
|
-
np.testing.assert_equal(kll.get_n(), 2*n)
|
|
78
|
-
|
|
79
|
-
# we can then serialize and reconstruct the sketch
|
|
80
|
-
kll_bytes = kll.serialize() # serializes each sketch as a list
|
|
81
|
-
new_kll = vector_of_kll_floats_sketches(k, d)
|
|
82
|
-
for s in range(len(kll_bytes)):
|
|
83
|
-
new_kll.deserialize(kll_bytes[s], s)
|
|
84
|
-
|
|
85
|
-
# everything should be exactly equal
|
|
86
|
-
np.testing.assert_equal(kll.get_num_retained(), new_kll.get_num_retained())
|
|
87
|
-
np.testing.assert_equal;(kll.get_min_values(), new_kll.get_min_values())
|
|
88
|
-
np.testing.assert_equal(kll.get_max_values(), new_kll.get_max_values())
|
|
89
|
-
np.testing.assert_equal(kll.get_quantiles(0.7), new_kll.get_quantiles(0.7))
|
|
90
|
-
np.testing.assert_equal(kll.get_ranks(0.0), new_kll.get_ranks(0.0))
|
|
91
|
-
|
|
92
|
-
def test_kll_ints_sketches(self):
|
|
93
|
-
# already tested floats and it's templatized, so just make sure it instantiates properly
|
|
94
|
-
k = 100
|
|
95
|
-
d = 5
|
|
96
|
-
kll = vector_of_kll_ints_sketches(k, d)
|
|
97
|
-
self.assertTrue(np.all(kll.is_empty()))
|
|
98
|
-
|
|
99
|
-
def test_kll_2Dupdates(self):
|
|
100
|
-
# 1D case tested in the first example
|
|
101
|
-
# 2D case will follow same idea, but focusing on update()
|
|
102
|
-
k = 200
|
|
103
|
-
d = 3
|
|
104
|
-
# we'll do ~250k updates of 4 values each (total ~1mil updates, as above)
|
|
105
|
-
n = 2 ** 18
|
|
106
|
-
nbatch = 4
|
|
107
|
-
|
|
108
|
-
# create a sketch and inject ~1 million N(0,1) points
|
|
109
|
-
kll = vector_of_kll_floats_sketches(k, d)
|
|
110
|
-
# Track the min/max for each sketch to test later
|
|
111
|
-
smin = np.zeros(d) + np.inf
|
|
112
|
-
smax = np.zeros(d) - np.inf
|
|
113
|
-
|
|
114
|
-
for i in range(0, n):
|
|
115
|
-
dat = np.random.randn(nbatch, d)
|
|
116
|
-
smin = np.amin(np.row_stack((smin, dat)), axis=0)
|
|
117
|
-
smax = np.amax(np.row_stack((smax, dat)), axis=0)
|
|
118
|
-
kll.update(dat)
|
|
119
|
-
|
|
120
|
-
# 0 should be near the median
|
|
121
|
-
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
|
|
122
|
-
# the median should be near 0
|
|
123
|
-
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
|
|
124
|
-
# we also track the min/max independently from the rest of the data
|
|
125
|
-
# which lets us know the full observed data range
|
|
126
|
-
np.testing.assert_allclose(kll.get_min_values(), smin)
|
|
127
|
-
np.testing.assert_allclose(kll.get_max_values(), smax)
|
|
128
|
-
|
|
129
|
-
def test_kll_3Dupdates(self):
|
|
130
|
-
# now test 3D update, which should fail
|
|
131
|
-
k = 200
|
|
132
|
-
d = 3
|
|
133
|
-
|
|
134
|
-
# create a sketch
|
|
135
|
-
kll = vector_of_kll_floats_sketches(k, d)
|
|
136
|
-
|
|
137
|
-
# we'll try 1 3D update
|
|
138
|
-
dat = np.random.randn(10, 7, d)
|
|
139
|
-
try:
|
|
140
|
-
kll.update(dat)
|
|
141
|
-
except:
|
|
142
|
-
# this is what we expect
|
|
143
|
-
pass
|
|
144
|
-
# the sketches should still be empty
|
|
145
|
-
self.assertTrue(np.all(kll.is_empty()))
|
|
146
|
-
|
|
147
|
-
if __name__ == '__main__':
|
|
148
|
-
unittest.main()
|
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
|
|
18
|
-
import unittest
|
|
19
|
-
from datasketches import var_opt_sketch, var_opt_union, PyIntsSerDe, PyStringsSerDe
|
|
20
|
-
|
|
21
|
-
class VoTest(unittest.TestCase):
|
|
22
|
-
def test_vo_example(self):
|
|
23
|
-
k = 50 # a small value so we can easily fill the sketch
|
|
24
|
-
vo = var_opt_sketch(k)
|
|
25
|
-
|
|
26
|
-
# varopt sampling reduces to standard reservoir sampling
|
|
27
|
-
# if the items are all equally weighted, although the
|
|
28
|
-
# algorithm will be significantly slower than an optimized
|
|
29
|
-
# reservoir sampler
|
|
30
|
-
n = 5 * k
|
|
31
|
-
for i in range(0, n):
|
|
32
|
-
vo.update(i)
|
|
33
|
-
|
|
34
|
-
# we can also add a heavy item, using a negative weight for
|
|
35
|
-
# easy filtering later. keep in mind that "heavy" is a
|
|
36
|
-
# relative concept, so using a fixed multiple of n may not
|
|
37
|
-
# be considered a heavy item for larger values of n
|
|
38
|
-
vo.update(-1, 1000 * n)
|
|
39
|
-
self.assertEqual(k, vo.k)
|
|
40
|
-
self.assertEqual(k, vo.num_samples)
|
|
41
|
-
self.assertEqual(n + 1, vo.n)
|
|
42
|
-
self.assertFalse(vo.is_empty())
|
|
43
|
-
|
|
44
|
-
# we can easily get the list of items in the sample
|
|
45
|
-
items = vo.get_samples()
|
|
46
|
-
self.assertEqual(len(items), k)
|
|
47
|
-
|
|
48
|
-
count = 0
|
|
49
|
-
for tuple in vo:
|
|
50
|
-
sample = tuple[0]
|
|
51
|
-
weight = tuple[1]
|
|
52
|
-
count = count + 1
|
|
53
|
-
self.assertEqual(count, vo.num_samples)
|
|
54
|
-
|
|
55
|
-
# we can also apply a predicate to the sketch to get an estimate
|
|
56
|
-
# (with optimally minimal variance) of the subset sum of items
|
|
57
|
-
# matching that predicate among the entire population
|
|
58
|
-
|
|
59
|
-
# we'll use a lambda here, but any function operating on a single
|
|
60
|
-
# item which returns a boolean value should work
|
|
61
|
-
summary = vo.estimate_subset_sum(lambda x: x < 0)
|
|
62
|
-
self.assertEqual(summary['estimate'], 1000 * n)
|
|
63
|
-
self.assertEqual(summary['total_sketch_weight'], 1001 * n)
|
|
64
|
-
|
|
65
|
-
# a regular function is similarly handled
|
|
66
|
-
def geq_zero(x):
|
|
67
|
-
return x >= 0
|
|
68
|
-
summary = vo.estimate_subset_sum(geq_zero)
|
|
69
|
-
self.assertEqual(summary['estimate'], n)
|
|
70
|
-
self.assertEqual(summary['total_sketch_weight'], 1001 * n)
|
|
71
|
-
|
|
72
|
-
# next we'll create a second, smaller sketch with
|
|
73
|
-
# only heavier items relative to the previous sketch,
|
|
74
|
-
# but with the sketch in sampling mode
|
|
75
|
-
k2 = 5
|
|
76
|
-
vo2 = var_opt_sketch(k2)
|
|
77
|
-
# for weight, use the estimate of all items >=0 from before
|
|
78
|
-
wt = summary['estimate']
|
|
79
|
-
for i in range(0, k2 + 1):
|
|
80
|
-
vo2.update((2 * n) + i, wt)
|
|
81
|
-
|
|
82
|
-
# now union the sketches, demonstrating how the
|
|
83
|
-
# union's k may not be equal to that of either
|
|
84
|
-
# input value
|
|
85
|
-
union = var_opt_union(k)
|
|
86
|
-
union.update(vo)
|
|
87
|
-
union.update(vo2)
|
|
88
|
-
|
|
89
|
-
result = union.get_result()
|
|
90
|
-
self.assertEqual(n + k2 + 2, result.n)
|
|
91
|
-
self.assertFalse(result.is_empty())
|
|
92
|
-
self.assertGreater(result.k, k2)
|
|
93
|
-
self.assertLess(result.k, k)
|
|
94
|
-
|
|
95
|
-
# we can compare what information is available from both
|
|
96
|
-
# the union and a sketch.
|
|
97
|
-
print(union)
|
|
98
|
-
|
|
99
|
-
# if we want to print the list of items, there must be a
|
|
100
|
-
# __str__() method for each item (which need not be the same
|
|
101
|
-
# type; they're all generic python objects when used from
|
|
102
|
-
# python), otherwise you may trigger an exception.
|
|
103
|
-
# to_string() is provided as a convenience to avoid direct
|
|
104
|
-
# calls to __str__() with parameters.
|
|
105
|
-
print(result.to_string(True))
|
|
106
|
-
|
|
107
|
-
# finally, we can serialize the sketch by providing an
|
|
108
|
-
# appropriate serde class.
|
|
109
|
-
expected_size = result.get_serialized_size_bytes(PyIntsSerDe())
|
|
110
|
-
b = result.serialize(PyIntsSerDe())
|
|
111
|
-
self.assertEqual(expected_size, len(b))
|
|
112
|
-
|
|
113
|
-
# if we try to deserialize with the wrong serde, things break
|
|
114
|
-
try:
|
|
115
|
-
var_opt_sketch.deserialize(b, PyStringsSerDe())
|
|
116
|
-
self.fail()
|
|
117
|
-
except:
|
|
118
|
-
# expected; do nothing
|
|
119
|
-
self.assertTrue(True)
|
|
120
|
-
|
|
121
|
-
# using the correct serde gives us back a copy of the original
|
|
122
|
-
rebuilt = var_opt_sketch.deserialize(b, PyIntsSerDe())
|
|
123
|
-
self.assertEqual(result.k, rebuilt.k)
|
|
124
|
-
self.assertEqual(result.num_samples, rebuilt.num_samples)
|
|
125
|
-
self.assertEqual(result.n, rebuilt.n)
|
|
126
|
-
summary1 = result.estimate_subset_sum(geq_zero)
|
|
127
|
-
summary2 = rebuilt.estimate_subset_sum(geq_zero)
|
|
128
|
-
self.assertEqual(summary1['estimate'], summary2['estimate'])
|
|
129
|
-
self.assertEqual(summary1['total_sketch_weight'], summary2['total_sketch_weight'])
|
|
130
|
-
|
|
131
|
-
if __name__ == '__main__':
|
|
132
|
-
unittest.main()
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
Code snippets used to generate to generate the binary images from Java.
|
|
21
|
-
Heavy items have negative weights to allow a simple predicate to filter
|
|
22
|
-
heavy vs light sketch entires.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
varopt_sketch_long_sampling.bin:
|
|
26
|
-
final VarOptItemsSketch<String> sk = VarOptItemsSketch.newInstance(1024);
|
|
27
|
-
for (int i = 1; i <= 200; ++i) {
|
|
28
|
-
sk.update(Integer.toString(i), 1000.0 / i);
|
|
29
|
-
}
|
|
30
|
-
byte[] bytes = sk.toByteArray(new ArrayOfStringsSerDe());
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
varopt_sketch_string_exact.bin:
|
|
34
|
-
final VarOptItemsSketch<Long> sk = VarOptItemsSketch.newInstance(1024);
|
|
35
|
-
for (long i = 0; i < 2000; ++i) {
|
|
36
|
-
sk.update(i, 1.0);
|
|
37
|
-
}
|
|
38
|
-
sk.update(-1L, 100000.0);
|
|
39
|
-
sk.update(-2L, 110000.0);
|
|
40
|
-
sk.update(-3L, 120000.0);
|
|
41
|
-
byte[] bytes = sk.toByteArray(new ArrayOfLongsSerDe());
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
varopt_union_double_sampling.bin:
|
|
45
|
-
// parallels small samplign sketch test
|
|
46
|
-
final int kSmall = 16;
|
|
47
|
-
final int n1 = 32;
|
|
48
|
-
final int n2 = 64;
|
|
49
|
-
final int kMax = 128;
|
|
50
|
-
|
|
51
|
-
// small k sketch, but sampling
|
|
52
|
-
VarOptItemsSketch<Double> sketch = VarOptItemsSketch.newInstance(kSmall);
|
|
53
|
-
for (int i = 0; i < n1; ++i) {
|
|
54
|
-
sketch.update(1.0 * i, 1.0);
|
|
55
|
-
}
|
|
56
|
-
sketch.update(-1.0, n1 * n1); // add a heavy item
|
|
57
|
-
|
|
58
|
-
final VarOptItemsUnion<Double> union = VarOptItemsUnion.newInstance(kMax);
|
|
59
|
-
union.update(sketch);
|
|
60
|
-
|
|
61
|
-
// another one, but different n to get a different per-item weight
|
|
62
|
-
sketch = VarOptItemsSketch.newInstance(kSmall);
|
|
63
|
-
for (int i = 0; i < n2; ++i) {
|
|
64
|
-
sketch.update(1.0 * i, 1.0);
|
|
65
|
-
}
|
|
66
|
-
union.update(sketch);
|
|
67
|
-
byte[] bytes = union.toByteArray(new ArrayOfDoublesSerDe());
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
|
|
18
|
-
# Modified from:
|
|
19
|
-
# http://www.benjack.io/2018/02/02/python-cpp-revisited.html
|
|
20
|
-
|
|
21
|
-
import os
|
|
22
|
-
import sys
|
|
23
|
-
import platform
|
|
24
|
-
import subprocess
|
|
25
|
-
import re
|
|
26
|
-
from datetime import datetime, timezone
|
|
27
|
-
|
|
28
|
-
from setuptools import setup, find_packages, Extension
|
|
29
|
-
from setuptools.command.build_ext import build_ext
|
|
30
|
-
|
|
31
|
-
class CMakeExtension(Extension):
|
|
32
|
-
def __init__(self, name, sourcedir=''):
|
|
33
|
-
Extension.__init__(self, name, sources=[])
|
|
34
|
-
self.sourcedir = os.path.abspath(sourcedir)
|
|
35
|
-
|
|
36
|
-
class CMakeBuild(build_ext):
|
|
37
|
-
def run(self):
|
|
38
|
-
try:
|
|
39
|
-
subprocess.check_output(['cmake', '--version'])
|
|
40
|
-
except OSError:
|
|
41
|
-
raise RuntimeError(
|
|
42
|
-
"CMake >= 3.12 must be installed to build the following extensions: " +
|
|
43
|
-
", ".join(e.name for e in self.extensions))
|
|
44
|
-
|
|
45
|
-
for ext in self.extensions:
|
|
46
|
-
self.build_extension(ext)
|
|
47
|
-
|
|
48
|
-
def build_extension(self, ext):
|
|
49
|
-
extdir = os.path.abspath(
|
|
50
|
-
os.path.dirname(self.get_ext_fullpath(ext.name)))
|
|
51
|
-
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
|
|
52
|
-
cmake_args += ['-DWITH_PYTHON=True']
|
|
53
|
-
cmake_args += ['-DCMAKE_CXX_STANDARD=11']
|
|
54
|
-
# ensure we use a consistent python version
|
|
55
|
-
cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
|
|
56
|
-
cfg = 'Debug' if self.debug else 'Release'
|
|
57
|
-
build_args = ['--config', cfg]
|
|
58
|
-
|
|
59
|
-
if platform.system() == "Windows":
|
|
60
|
-
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(
|
|
61
|
-
cfg.upper(),
|
|
62
|
-
extdir)]
|
|
63
|
-
if sys.maxsize > 2**32:
|
|
64
|
-
cmake_args += ['-T', 'host=x64']
|
|
65
|
-
cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
|
|
66
|
-
build_args += ['--', '/m']
|
|
67
|
-
else:
|
|
68
|
-
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
|
|
69
|
-
build_args += ['--', '-j2']
|
|
70
|
-
|
|
71
|
-
env = os.environ.copy()
|
|
72
|
-
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(
|
|
73
|
-
env.get('CXXFLAGS', ''),
|
|
74
|
-
self.distribution.get_version())
|
|
75
|
-
if not os.path.exists(self.build_temp):
|
|
76
|
-
os.makedirs(self.build_temp)
|
|
77
|
-
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
|
|
78
|
-
cwd=self.build_temp, env=env)
|
|
79
|
-
subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
|
|
80
|
-
cwd=self.build_temp, env=env)
|
|
81
|
-
print() # add an empty line to pretty print
|
|
82
|
-
|
|
83
|
-
# Read and parse the version format
|
|
84
|
-
# @DT@ -> datestamp
|
|
85
|
-
# @HHMM@ -> .devHHMM to indicate development version
|
|
86
|
-
# Releases should have a fixed version with no @ variables
|
|
87
|
-
with open('version.cfg.in', 'r') as file:
|
|
88
|
-
ds_version = file.read().rstrip()
|
|
89
|
-
dt = datetime.now(timezone.utc)
|
|
90
|
-
ds_version = re.sub('@DT@', dt.strftime('%Y%m%d'), ds_version)
|
|
91
|
-
ds_version = re.sub('@HHMM@', 'dev' + dt.strftime('%H%M'), ds_version)
|
|
92
|
-
|
|
93
|
-
setup(
|
|
94
|
-
name='datasketches',
|
|
95
|
-
version=ds_version,
|
|
96
|
-
author='Apache Software Foundation',
|
|
97
|
-
author_email='dev@datasketches.apache.org',
|
|
98
|
-
description='The Apache DataSketches Library for Python',
|
|
99
|
-
license='Apache License 2.0',
|
|
100
|
-
url='http://datasketches.apache.org',
|
|
101
|
-
long_description=open('python/README.md').read(),
|
|
102
|
-
long_description_content_type='text/markdown',
|
|
103
|
-
packages=find_packages(where='python',exclude=['src','include','*tests*']), # src not needed if only the .so
|
|
104
|
-
package_dir={'':'python'},
|
|
105
|
-
# may need to add all source paths for sdist packages w/o MANIFEST.in
|
|
106
|
-
ext_modules=[CMakeExtension('datasketches')],
|
|
107
|
-
cmdclass={'build_ext': CMakeBuild},
|
|
108
|
-
install_requires=['numpy'],
|
|
109
|
-
zip_safe=False
|
|
110
|
-
)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
|
3
|
-
# distributed with this work for additional information
|
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
|
6
|
-
# "License"); you may not use this file except in compliance
|
|
7
|
-
# with the License. You may obtain a copy of the License at
|
|
8
|
-
#
|
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
-
#
|
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
|
12
|
-
# software distributed under the License is distributed on an
|
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
-
# KIND, either express or implied. See the License for the
|
|
15
|
-
# specific language governing permissions and limitations
|
|
16
|
-
# under the License.
|
|
17
|
-
|
|
18
|
-
[tox]
|
|
19
|
-
envlist = py3
|
|
20
|
-
isolated_build = true
|
|
21
|
-
|
|
22
|
-
[testenv]
|
|
23
|
-
deps = pytest
|
|
24
|
-
numpy
|
|
25
|
-
changedir = python/tests
|
|
26
|
-
commands = pytest
|
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef ARRAY_OF_DOUBLES_INTERSECTION_HPP_
|
|
21
|
-
#define ARRAY_OF_DOUBLES_INTERSECTION_HPP_
|
|
22
|
-
|
|
23
|
-
#include <vector>
|
|
24
|
-
#include <memory>
|
|
25
|
-
|
|
26
|
-
#include "array_of_doubles_sketch.hpp"
|
|
27
|
-
#include "tuple_intersection.hpp"
|
|
28
|
-
|
|
29
|
-
namespace datasketches {
|
|
30
|
-
|
|
31
|
-
template<
|
|
32
|
-
typename Policy,
|
|
33
|
-
typename Allocator = std::allocator<double>
|
|
34
|
-
>
|
|
35
|
-
class array_of_doubles_intersection: public tuple_intersection<aod<Allocator>, Policy, AllocAOD<Allocator>> {
|
|
36
|
-
public:
|
|
37
|
-
using Summary = aod<Allocator>;
|
|
38
|
-
using AllocSummary = AllocAOD<Allocator>;
|
|
39
|
-
using Base = tuple_intersection<Summary, Policy, AllocSummary>;
|
|
40
|
-
using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
|
|
41
|
-
using resize_factor = theta_constants::resize_factor;
|
|
42
|
-
|
|
43
|
-
explicit array_of_doubles_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator());
|
|
44
|
-
|
|
45
|
-
CompactSketch get_result(bool ordered = true) const;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
} /* namespace datasketches */
|
|
49
|
-
|
|
50
|
-
#include "array_of_doubles_intersection_impl.hpp"
|
|
51
|
-
|
|
52
|
-
#endif
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef ARRAY_OF_DOUBLES_UNION_HPP_
|
|
21
|
-
#define ARRAY_OF_DOUBLES_UNION_HPP_
|
|
22
|
-
|
|
23
|
-
#include <vector>
|
|
24
|
-
#include <memory>
|
|
25
|
-
|
|
26
|
-
#include "array_of_doubles_sketch.hpp"
|
|
27
|
-
#include "tuple_union.hpp"
|
|
28
|
-
|
|
29
|
-
namespace datasketches {
|
|
30
|
-
|
|
31
|
-
template<typename A = std::allocator<double>>
|
|
32
|
-
struct array_of_doubles_union_policy_alloc {
|
|
33
|
-
array_of_doubles_union_policy_alloc(uint8_t num_values = 1): num_values_(num_values) {}
|
|
34
|
-
|
|
35
|
-
void operator()(aod<A>& summary, const aod<A>& other) const {
|
|
36
|
-
for (size_t i = 0; i < summary.size(); ++i) {
|
|
37
|
-
summary[i] += other[i];
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
uint8_t get_num_values() const {
|
|
42
|
-
return num_values_;
|
|
43
|
-
}
|
|
44
|
-
private:
|
|
45
|
-
uint8_t num_values_;
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
using array_of_doubles_union_policy = array_of_doubles_union_policy_alloc<>;
|
|
49
|
-
|
|
50
|
-
template<typename Allocator = std::allocator<double>>
|
|
51
|
-
class array_of_doubles_union_alloc: public tuple_union<aod<Allocator>, array_of_doubles_union_policy_alloc<Allocator>, AllocAOD<Allocator>> {
|
|
52
|
-
public:
|
|
53
|
-
using Policy = array_of_doubles_union_policy_alloc<Allocator>;
|
|
54
|
-
using Base = tuple_union<aod<Allocator>, Policy, AllocAOD<Allocator>>;
|
|
55
|
-
using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
|
|
56
|
-
using resize_factor = theta_constants::resize_factor;
|
|
57
|
-
|
|
58
|
-
class builder;
|
|
59
|
-
|
|
60
|
-
CompactSketch get_result(bool ordered = true) const;
|
|
61
|
-
|
|
62
|
-
private:
|
|
63
|
-
// for builder
|
|
64
|
-
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
template<typename Allocator>
|
|
68
|
-
class array_of_doubles_union_alloc<Allocator>::builder: public tuple_base_builder<builder, array_of_doubles_union_policy_alloc<Allocator>, Allocator> {
|
|
69
|
-
public:
|
|
70
|
-
builder(const array_of_doubles_union_policy_alloc<Allocator>& policy = array_of_doubles_union_policy_alloc<Allocator>(), const Allocator& allocator = Allocator());
|
|
71
|
-
array_of_doubles_union_alloc<Allocator> build() const;
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
// alias with default allocator
|
|
75
|
-
using array_of_doubles_union = array_of_doubles_union_alloc<>;
|
|
76
|
-
|
|
77
|
-
} /* namespace datasketches */
|
|
78
|
-
|
|
79
|
-
#include "array_of_doubles_union_impl.hpp"
|
|
80
|
-
|
|
81
|
-
#endif
|