datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -1,35 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
import numpy as np
|
19
|
-
|
20
|
-
from _datasketches import KernelFunction
|
21
|
-
|
22
|
-
# This file provides an example Python Kernel Function implementation.
|
23
|
-
#
|
24
|
-
# Each implementation must extend the KernelFunction class
|
25
|
-
# and define the __call__ method
|
26
|
-
|
27
|
-
# Implements a basic Gaussian Kernel
|
28
|
-
class GaussianKernel(KernelFunction):
|
29
|
-
def __init__(self, bandwidth: float=1.0):
|
30
|
-
KernelFunction.__init__(self)
|
31
|
-
self._bw = bandwidth
|
32
|
-
self._scale = -0.5 * (bandwidth ** -2)
|
33
|
-
|
34
|
-
def __call__(self, a: np.array, b: np.array) -> float:
|
35
|
-
return np.exp(self._scale * np.linalg.norm(a - b)**2)
|
@@ -1,110 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
from _datasketches import PyObjectSerDe
|
19
|
-
|
20
|
-
import struct
|
21
|
-
|
22
|
-
# This file provides several Python SerDe implementation examples.
|
23
|
-
#
|
24
|
-
# Each implementation must extend the PyObjectSerDe class and define
|
25
|
-
# three methods:
|
26
|
-
# * get_size(item) returns an int of the number of bytes needed to
|
27
|
-
# serialize the given item
|
28
|
-
# * to_bytes(item) returns a bytes object representing a serialized
|
29
|
-
# version of the given item
|
30
|
-
# * from_bytes(data, offset) takes a bytes object (data) and an offset
|
31
|
-
# indicating where in the data array to start reading. The method
|
32
|
-
# returns a tuple with the newly reconstructed object and the
|
33
|
-
# total number of bytes beyond the offset read from the input data.
|
34
|
-
|
35
|
-
# Implements a simple string-encoding scheme where a string is
|
36
|
-
# written as <num_bytes> <string_contents>, with no null termination.
|
37
|
-
# This format allows pre-allocating each string, at the cost of
|
38
|
-
# additional storage. Using this format, the serialized string consumes
|
39
|
-
# 4 + len(item) bytes.
|
40
|
-
class PyStringsSerDe(PyObjectSerDe):
|
41
|
-
def get_size(self, item):
|
42
|
-
return int(4 + len(item))
|
43
|
-
|
44
|
-
def to_bytes(self, item: str):
|
45
|
-
b = bytearray()
|
46
|
-
b.extend(len(item).to_bytes(4, 'little'))
|
47
|
-
b.extend(map(ord,item))
|
48
|
-
return bytes(b)
|
49
|
-
|
50
|
-
def from_bytes(self, data: bytes, offset: int):
|
51
|
-
num_chars = int.from_bytes(data[offset:offset+3], 'little')
|
52
|
-
if (num_chars < 0 or num_chars > offset + len(data)):
|
53
|
-
raise IndexError(f'num_chars read must be non-negative and not larger than the buffer. Found {num_chars}')
|
54
|
-
str = data[offset+4:offset+4+num_chars].decode()
|
55
|
-
return (str, 4+num_chars)
|
56
|
-
|
57
|
-
# Implements an integer encoding scheme where each integer is written
|
58
|
-
# as a 32-bit (4 byte) little-endian value.
|
59
|
-
class PyIntsSerDe(PyObjectSerDe):
|
60
|
-
def get_size(self, item):
|
61
|
-
return int(4)
|
62
|
-
|
63
|
-
def to_bytes(self, item):
|
64
|
-
return struct.pack('<i', item)
|
65
|
-
|
66
|
-
def from_bytes(self, data: bytes, offset: int):
|
67
|
-
val = struct.unpack_from('<i', data, offset)[0]
|
68
|
-
return (val, 4)
|
69
|
-
|
70
|
-
|
71
|
-
# Implements an integer encoding scheme where each integer is written
|
72
|
-
# as a 64-bit (8 byte) little-endian value.
|
73
|
-
class PyLongsSerDe(PyObjectSerDe):
|
74
|
-
def get_size(self, item):
|
75
|
-
return int(8)
|
76
|
-
|
77
|
-
def to_bytes(self, item):
|
78
|
-
return struct.pack('<l', item)
|
79
|
-
|
80
|
-
def from_bytes(self, data: bytes, offset: int):
|
81
|
-
val = struct.unpack_from('<l', data, offset)[0]
|
82
|
-
return (val, 8)
|
83
|
-
|
84
|
-
|
85
|
-
# Implements a floating point encoding scheme where each value is written
|
86
|
-
# as a 32-bit floating point value.
|
87
|
-
class PyFloatsSerDe(PyObjectSerDe):
|
88
|
-
def get_size(self, item):
|
89
|
-
return int(4)
|
90
|
-
|
91
|
-
def to_bytes(self, item):
|
92
|
-
return struct.pack('<f', item)
|
93
|
-
|
94
|
-
def from_bytes(self, data: bytes, offset: int):
|
95
|
-
val = struct.unpack_from('<f', data, offset)[0]
|
96
|
-
return (val, 4)
|
97
|
-
|
98
|
-
|
99
|
-
# Implements a floating point encoding scheme where each value is written
|
100
|
-
# as a 64-bit floating point value.
|
101
|
-
class PyDoublesSerDe(PyObjectSerDe):
|
102
|
-
def get_size(self, item):
|
103
|
-
return int(8)
|
104
|
-
|
105
|
-
def to_bytes(self, item):
|
106
|
-
return struct.pack('<d', item)
|
107
|
-
|
108
|
-
def from_bytes(self, data: bytes, offset: int):
|
109
|
-
val = struct.unpack_from('<d', data, offset)[0]
|
110
|
-
return (val, 8)
|
@@ -1,77 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
import sys
|
19
|
-
|
20
|
-
from _datasketches import TuplePolicy
|
21
|
-
|
22
|
-
# This file provides an example Python Tuple Policy implementation.
|
23
|
-
#
|
24
|
-
# Each implementation must extend the PyTuplePolicy class and define
|
25
|
-
# two methods:
|
26
|
-
# * create_summary() returns a new Summary object
|
27
|
-
# * update_summary(summary, update) applies the relevant policy to update the
|
28
|
-
# provided summary with the data in update.
|
29
|
-
# * __call__ may be similar to update_summary but allows a different
|
30
|
-
# implementation for set operations (union and intersection)
|
31
|
-
|
32
|
-
# Implements an accumulator summary policy, where new values are
|
33
|
-
# added to the existing value.
|
34
|
-
class AccumulatorPolicy(TuplePolicy):
|
35
|
-
def __init__(self):
|
36
|
-
TuplePolicy.__init__(self)
|
37
|
-
|
38
|
-
def create_summary(self) -> int:
|
39
|
-
return int(0)
|
40
|
-
|
41
|
-
def update_summary(self, summary: int, update: int) -> int:
|
42
|
-
summary += update
|
43
|
-
return summary
|
44
|
-
|
45
|
-
def __call__(self, summary: int, update: int) -> int:
|
46
|
-
summary += update
|
47
|
-
return summary
|
48
|
-
|
49
|
-
|
50
|
-
# Implements a MAX rule, where the largest integer value is always kept
|
51
|
-
class MaxIntPolicy(TuplePolicy):
|
52
|
-
def __init__(self):
|
53
|
-
TuplePolicy.__init__(self)
|
54
|
-
|
55
|
-
def create_summary(self) -> int:
|
56
|
-
return int(-sys.maxsize-1)
|
57
|
-
|
58
|
-
def update_summary(self, summary: int, update: int) -> int:
|
59
|
-
return max(summary, update)
|
60
|
-
|
61
|
-
def __call__(self, summary: int, update: int) -> int:
|
62
|
-
return max(summary, update)
|
63
|
-
|
64
|
-
|
65
|
-
# Implements a MIN rule, where the smallest integer value is always kept
|
66
|
-
class MinIntPolicy(TuplePolicy):
|
67
|
-
def __init__(self):
|
68
|
-
TuplePolicy.__init__(self)
|
69
|
-
|
70
|
-
def create_summary(self) -> int:
|
71
|
-
return int(sys.maxsize)
|
72
|
-
|
73
|
-
def update_summary(self, summary: int, update: int) -> int:
|
74
|
-
return min(summary, update)
|
75
|
-
|
76
|
-
def __call__(self, summary: int, update: int) -> int:
|
77
|
-
return min(summary, update)
|
@@ -1,205 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
from abc import ABC, abstractmethod
|
19
|
-
|
20
|
-
from _datasketches import _tuple_sketch, _compact_tuple_sketch, _update_tuple_sketch
|
21
|
-
from _datasketches import _tuple_union, _tuple_intersection
|
22
|
-
from _datasketches import _tuple_a_not_b, _tuple_jaccard_similarity
|
23
|
-
from _datasketches import PyObjectSerDe, theta_sketch, TuplePolicy
|
24
|
-
|
25
|
-
class tuple_sketch(ABC):
|
26
|
-
"""An abstract base class representing a Tuple Sketch."""
|
27
|
-
_gadget: _tuple_sketch
|
28
|
-
|
29
|
-
def __str__(self, print_items:bool=False):
|
30
|
-
return self._gadget.to_string(print_items)
|
31
|
-
|
32
|
-
def is_empty(self):
|
33
|
-
"""Returns True if the sketch is empty, otherwise False."""
|
34
|
-
return self._gadget.is_empty()
|
35
|
-
|
36
|
-
def get_estimate(self):
|
37
|
-
"""Returns an estimate of the distinct count of the input stream."""
|
38
|
-
return self._gadget.get_estimate()
|
39
|
-
|
40
|
-
def get_upper_bound(self, num_std_devs:int):
|
41
|
-
"""Returns an approximate upper bound on the estimate at standard deviations in {1, 2, 3}."""
|
42
|
-
return self._gadget.get_upper_bound(num_std_devs)
|
43
|
-
|
44
|
-
def get_lower_bound(self, num_std_devs:int):
|
45
|
-
"""Returns an approximate lower bound on the estimate at standard deviations in {1, 2, 3}."""
|
46
|
-
return self._gadget.get_lower_bound(num_std_devs)
|
47
|
-
|
48
|
-
def is_estimation_mode(self):
|
49
|
-
"""Returns True if the sketch is in estimation mode, otherwise False."""
|
50
|
-
return self._gadget.is_estimation_mode()
|
51
|
-
|
52
|
-
def get_theta(self):
|
53
|
-
"""Returns theta (the effective sampling rate) as a fraction from 0 to 1."""
|
54
|
-
return self._gadget.get_theta()
|
55
|
-
|
56
|
-
def get_theta64(self):
|
57
|
-
"""Returns theta as a 64-bit integer value."""
|
58
|
-
return self._gadget.get_theta64()
|
59
|
-
|
60
|
-
def get_num_retained(self):
|
61
|
-
"""Returns the number of items currently in the sketch."""
|
62
|
-
return self._gadget.get_num_retained()
|
63
|
-
|
64
|
-
def get_seed_hash(self):
|
65
|
-
"""Returns a hash of the seed used in the sketch."""
|
66
|
-
return self._gadget.get_seed_hash()
|
67
|
-
|
68
|
-
def is_ordered(self):
|
69
|
-
"""Returns True if the sketch entries are sorder, otherwise False."""
|
70
|
-
return self._gadget.is_ordered()
|
71
|
-
|
72
|
-
def __iter__(self):
|
73
|
-
return self._gadget.__iter__()
|
74
|
-
|
75
|
-
|
76
|
-
class compact_tuple_sketch(tuple_sketch):
|
77
|
-
"""An instance of a Tuple Sketch that has been compacted and can no longer accept updates."""
|
78
|
-
|
79
|
-
def __init__(self, other:tuple_sketch, ordered:bool = True):
|
80
|
-
if other == None:
|
81
|
-
self._gadget = None
|
82
|
-
else:
|
83
|
-
self._gadget = _compact_tuple_sketch(other, ordered)
|
84
|
-
|
85
|
-
def serialize(self, serde:PyObjectSerDe):
|
86
|
-
"""Serializes the sketch into a bytes object with the provided SerDe."""
|
87
|
-
return self._gadget.serialize(serde)
|
88
|
-
|
89
|
-
@classmethod
|
90
|
-
def from_theta_sketch(cls, sketch:theta_sketch, summary, seed:int=_tuple_sketch.DEFAULT_SEED):
|
91
|
-
"""Creates a comapct Tuple Sketch from a Theta Sketch using a fixed summary value."""
|
92
|
-
self = cls.__new__(cls)
|
93
|
-
self._gadget = _compact_tuple_sketch(sketch, summary, seed)
|
94
|
-
return self
|
95
|
-
|
96
|
-
@classmethod
|
97
|
-
def deserialize(cls, data:bytes, serde:PyObjectSerDe, seed:int=_tuple_sketch.DEFAULT_SEED):
|
98
|
-
"""Reads a bytes object and uses the provded SerDe to return the corresponding compact_tuple_sketch."""
|
99
|
-
self = cls.__new__(cls)
|
100
|
-
self._gadget = _compact_tuple_sketch.deserialize(data, serde, seed)
|
101
|
-
return self
|
102
|
-
|
103
|
-
|
104
|
-
class update_tuple_sketch(tuple_sketch):
|
105
|
-
"""An instance of a Tuple Sketch that is available for updates. Requires a Policy object to handle Summary values."""
|
106
|
-
|
107
|
-
def __init__(self, policy, lg_k:int = 12, p:float = 1.0, seed:int = _tuple_sketch.DEFAULT_SEED):
|
108
|
-
self._policy = policy
|
109
|
-
self._gadget = _update_tuple_sketch(self._policy, lg_k, p, seed)
|
110
|
-
|
111
|
-
def update(self, datum, value):
|
112
|
-
"""Updates the sketch with the provided item and summary value."""
|
113
|
-
self._gadget.update(datum, value)
|
114
|
-
|
115
|
-
def compact(self, ordered:bool = True) -> compact_tuple_sketch:
|
116
|
-
"""Returns a compacted form of the sketch, optionally sorting it."""
|
117
|
-
return self._gadget.compact(ordered)
|
118
|
-
|
119
|
-
def reset(self):
|
120
|
-
"""Resets the sketch to the initial empty state."""
|
121
|
-
self._gadget.reset()
|
122
|
-
|
123
|
-
|
124
|
-
class tuple_union:
|
125
|
-
"""An object that can merge Tuple Sketches. Requires a Policy object to handle merging Summaries."""
|
126
|
-
_policy: TuplePolicy
|
127
|
-
|
128
|
-
def __init__(self, policy:TuplePolicy, lg_k:int = 12, p:float = 1.0, seed:int = _tuple_sketch.DEFAULT_SEED):
|
129
|
-
self._policy = policy
|
130
|
-
self._gadget = _tuple_union(self._policy, lg_k, p, seed)
|
131
|
-
|
132
|
-
def update(self, sketch:tuple_sketch):
|
133
|
-
"""Updates the union with the given sketch."""
|
134
|
-
self._gadget.update(sketch._gadget)
|
135
|
-
|
136
|
-
def get_result(self, ordered:bool = True) -> compact_tuple_sketch:
|
137
|
-
"""Returns the sketch corresponding to the union result, optionally sorted."""
|
138
|
-
return compact_tuple_sketch(self._gadget.get_result(ordered), ordered)
|
139
|
-
|
140
|
-
def reset(self):
|
141
|
-
"""Resets the union to the initial empty state."""
|
142
|
-
self._gadget.reset()
|
143
|
-
|
144
|
-
|
145
|
-
class tuple_intersection:
|
146
|
-
"""An object that can intersect Tuple Sketches. Requires a Policy object to handle intersecting Summaries."""
|
147
|
-
_policy: TuplePolicy
|
148
|
-
|
149
|
-
def __init__(self, policy:TuplePolicy, seed:int = _tuple_sketch.DEFAULT_SEED):
|
150
|
-
self._policy = policy
|
151
|
-
self._gadget = _tuple_intersection(self._policy, seed)
|
152
|
-
|
153
|
-
def update(self, sketch:tuple_sketch):
|
154
|
-
"""Intersects the provided sketch with the current intersection state."""
|
155
|
-
self._gadget.update(sketch._gadget)
|
156
|
-
|
157
|
-
def has_result(self) -> bool:
|
158
|
-
"""Returns True if the intersection has a valid result, otherwise False."""
|
159
|
-
return self._gadget.has_result()
|
160
|
-
|
161
|
-
def get_result(self, ordered:bool = True) -> compact_tuple_sketch:
|
162
|
-
"""Returns the sketch corresponding to the intersection result, optionally sorted."""
|
163
|
-
return compact_tuple_sketch(self._gadget.get_result(ordered), ordered)
|
164
|
-
|
165
|
-
|
166
|
-
class tuple_a_not_b:
|
167
|
-
"""An object that can peform the A-not-B operation between two sketches."""
|
168
|
-
def __init__(self, seed:int = _tuple_sketch.DEFAULT_SEED):
|
169
|
-
self._gadget = _tuple_a_not_b(seed)
|
170
|
-
|
171
|
-
def compute(self, a:tuple_sketch, b:tuple_sketch, ordered:bool=True) -> compact_tuple_sketch:
|
172
|
-
"""Returns a sketch with the result of applying the A-not-B operation on the given inputs."""
|
173
|
-
return compact_tuple_sketch(self._gadget.compute(a._gadget, b._gadget))
|
174
|
-
|
175
|
-
|
176
|
-
class tuple_jaccard_similarity:
|
177
|
-
@staticmethod
|
178
|
-
def jaccard(a:tuple_sketch, b:tuple_sketch, seed:int=_tuple_sketch.DEFAULT_SEED):
|
179
|
-
"""Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches."""
|
180
|
-
return _tuple_jaccard_similarity.jaccard(a._gadget, b._gadget, seed)
|
181
|
-
|
182
|
-
@staticmethod
|
183
|
-
def exactly_equal(a:tuple_sketch, b:tuple_sketch, seed:int=_tuple_sketch.DEFAULT_SEED):
|
184
|
-
"""Returns True if sketch_a and sketch_b are equivalent, otherwise False."""
|
185
|
-
return _tuple_jaccard_similarity.exactly_equal(a._gadget, b._gadget, seed)
|
186
|
-
|
187
|
-
@staticmethod
|
188
|
-
def similarity_test(actual:tuple_sketch, expected:tuple_sketch, threshold:float, seed:int=_tuple_sketch.DEFAULT_SEED):
|
189
|
-
"""Tests similarity of an actual sketch against an expected sketch.
|
190
|
-
|
191
|
-
Computes the lower bound of the Jaccard index J_{LB} of the actual and expected sketches.
|
192
|
-
If J_{LB} >= threshold, then the sketches are considered to be similar sith a confidence of
|
193
|
-
97.7% and returns True, otherwise False.
|
194
|
-
"""
|
195
|
-
return _tuple_jaccard_similarity.similarity_test(actual._gadget, expected._gadget, threshold, seed)
|
196
|
-
|
197
|
-
@staticmethod
|
198
|
-
def dissimilarity_test(actual:tuple_sketch, expected:tuple_sketch, threshold:float, seed:int=_tuple_sketch.DEFAULT_SEED):
|
199
|
-
"""Tests dissimilarity of an actual sketch against an expected sketch.
|
200
|
-
|
201
|
-
Computes the upper bound of the Jaccard index J_{UB} of the actual and expected sketches.
|
202
|
-
If J_{UB} <= threshold, then the sketches are considered to be dissimilar sith a confidence of
|
203
|
-
97.7% and returns True, otherwise False.
|
204
|
-
"""
|
205
|
-
return _tuple_jaccard_similarity.dissimilarity_test(actual._gadget, expected._gadget, threshold, seed)
|
@@ -1,38 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
"""The Apache DataSketches Library for Python
|
19
|
-
|
20
|
-
Provided under the Apache License, Verison 2.0
|
21
|
-
<http://www.apache.org/licenses/LICENSE-2.0>
|
22
|
-
"""
|
23
|
-
|
24
|
-
name = 'datasketches'
|
25
|
-
|
26
|
-
from _datasketches import *
|
27
|
-
|
28
|
-
from .PySerDe import *
|
29
|
-
from .TuplePolicy import *
|
30
|
-
from .KernelFunction import *
|
31
|
-
|
32
|
-
# Wrappers around the pybind11 classes for cases where we
|
33
|
-
# need to define a python object that is persisted within
|
34
|
-
# the C++ object. Currently, the native python portion of
|
35
|
-
# a class derived from a C++ class may be garbage collected
|
36
|
-
# even though a pointer to the C++ portion remains valid.
|
37
|
-
from .TupleWrapper import *
|
38
|
-
from .DensityWrapper import *
|
@@ -1,98 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
4
|
-
* distributed with this work for additional information
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
7
|
-
* "License"); you may not use this file except in compliance
|
8
|
-
* with the License. You may obtain a copy of the License at
|
9
|
-
*
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
*
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
13
|
-
* software distributed under the License is distributed on an
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
* KIND, either express or implied. See the License for the
|
16
|
-
* specific language governing permissions and limitations
|
17
|
-
* under the License.
|
18
|
-
*/
|
19
|
-
|
20
|
-
//#include <memory>
|
21
|
-
#include <pybind11/pybind11.h>
|
22
|
-
#include <pybind11/numpy.h>
|
23
|
-
|
24
|
-
#ifndef _KERNEL_FUNCTION_HPP_
|
25
|
-
#define _KERNEL_FUNCTION_HPP_
|
26
|
-
|
27
|
-
namespace py = pybind11;
|
28
|
-
|
29
|
-
namespace datasketches {
|
30
|
-
|
31
|
-
/**
|
32
|
-
* @brief kernel_function provides the underlying base class from
|
33
|
-
* which native Python kernels ultimately inherit. The actual
|
34
|
-
* kernels implement KernelFunction, as shown in KernelFunction.py
|
35
|
-
*/
|
36
|
-
struct kernel_function {
|
37
|
-
virtual double operator()(py::array_t<double>& a, const py::array_t<double>& b) const = 0;
|
38
|
-
virtual ~kernel_function() = default;
|
39
|
-
};
|
40
|
-
|
41
|
-
/**
|
42
|
-
* @brief KernelFunction provides the "trampoline" class for pybind11
|
43
|
-
* that allows for a native Python implementation of kernel
|
44
|
-
* functions.
|
45
|
-
*/
|
46
|
-
struct KernelFunction : public kernel_function {
|
47
|
-
using kernel_function::kernel_function;
|
48
|
-
|
49
|
-
/**
|
50
|
-
* @brief Evaluates K(a,b), the kernel function for the given points a and b
|
51
|
-
* @param a the first vector
|
52
|
-
* @param b the second vector
|
53
|
-
* @return The function value K(a,b)
|
54
|
-
*/
|
55
|
-
double operator()(py::array_t<double>& a, const py::array_t<double>& b) const override {
|
56
|
-
PYBIND11_OVERRIDE_PURE_NAME(
|
57
|
-
double, // Return type
|
58
|
-
kernel_function, // Parent class
|
59
|
-
"__call__", // Name of function in python
|
60
|
-
operator(), // Name of function in C++
|
61
|
-
a, b // Arguemnts
|
62
|
-
);
|
63
|
-
}
|
64
|
-
};
|
65
|
-
|
66
|
-
/* The kernel_function_holder provides a concrete class that dispatches calls
|
67
|
-
* from the sketch to the kernel_function. This class is needed to provide a
|
68
|
-
* concrete object to produce a compiled library, but library users should
|
69
|
-
* never need to use this directly.
|
70
|
-
*/
|
71
|
-
struct kernel_function_holder {
|
72
|
-
explicit kernel_function_holder(std::shared_ptr<kernel_function> kernel) : _kernel(kernel) {}
|
73
|
-
kernel_function_holder(const kernel_function_holder& other) : _kernel(other._kernel) {}
|
74
|
-
kernel_function_holder(kernel_function_holder&& other) : _kernel(std::move(other._kernel)) {}
|
75
|
-
kernel_function_holder& operator=(const kernel_function_holder& other) { _kernel = other._kernel; return *this; }
|
76
|
-
kernel_function_holder& operator=(kernel_function_holder&& other) { std::swap(_kernel, other._kernel); return *this; }
|
77
|
-
|
78
|
-
double operator()(const std::vector<double>& a, const py::array_t<double>& b) const {
|
79
|
-
py::array_t<double> a_arr(a.size(), a.data(), dummy_array_owner);
|
80
|
-
return _kernel->operator()(a_arr, b);
|
81
|
-
}
|
82
|
-
|
83
|
-
double operator()(const std::vector<double>& a, const std::vector<double>& b) const {
|
84
|
-
py::array_t<double> a_arr(a.size(), a.data(), dummy_array_owner);
|
85
|
-
py::array_t<double> b_arr(b.size(), b.data(), dummy_array_owner);
|
86
|
-
return _kernel->operator()(a_arr, b_arr);
|
87
|
-
}
|
88
|
-
|
89
|
-
private:
|
90
|
-
// a dummy object to "own" arrays when translating from std::vector to avoid a copy:
|
91
|
-
// https://github.com/pybind/pybind11/issues/323#issuecomment-575717041
|
92
|
-
py::str dummy_array_owner;
|
93
|
-
std::shared_ptr<kernel_function> _kernel;
|
94
|
-
};
|
95
|
-
|
96
|
-
}
|
97
|
-
|
98
|
-
#endif // _KERNEL_FUNCTION_HPP_
|
@@ -1,113 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
4
|
-
* distributed with this work for additional information
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
7
|
-
* "License"); you may not use this file except in compliance
|
8
|
-
* with the License. You may obtain a copy of the License at
|
9
|
-
*
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
-
*
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
13
|
-
* software distributed under the License is distributed on an
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
-
* KIND, either express or implied. See the License for the
|
16
|
-
* specific language governing permissions and limitations
|
17
|
-
* under the License.
|
18
|
-
*/
|
19
|
-
|
20
|
-
#include <pybind11/pybind11.h>
|
21
|
-
#include <pybind11/functional.h>
|
22
|
-
#include <sstream>
|
23
|
-
|
24
|
-
#ifndef _PY_SERDE_HPP_
|
25
|
-
#define _PY_SERDE_HPP_
|
26
|
-
|
27
|
-
namespace py = pybind11;
|
28
|
-
|
29
|
-
namespace datasketches {
|
30
|
-
|
31
|
-
/**
|
32
|
-
* @brief The py_object_serde is an abstract class that implements the
|
33
|
-
* datasketches serde interface, and is used to allow custom Python
|
34
|
-
* serialization of items wrapped as generic py::object types. The actual
|
35
|
-
* Python implementation classes must extend the PyObjectSerDe class.
|
36
|
-
*/
|
37
|
-
struct py_object_serde {
|
38
|
-
/**
|
39
|
-
* @brief Get the serialized size of an object, in bytes
|
40
|
-
*
|
41
|
-
* @param item A provided item
|
42
|
-
* @return int64_t The serialized size of the item, in bytes
|
43
|
-
*/
|
44
|
-
virtual int64_t get_size(const py::object& item) const = 0;
|
45
|
-
|
46
|
-
/**
|
47
|
-
* @brief Serializes an item to a bytes object
|
48
|
-
*
|
49
|
-
* @param item A provided item
|
50
|
-
* @return The serialized image of the item as a Python bytes object
|
51
|
-
*/
|
52
|
-
virtual py::bytes to_bytes(const py::object& item) const = 0;
|
53
|
-
|
54
|
-
/**
|
55
|
-
* @brief Constructs an object from a serialized image, reading the
|
56
|
-
* incoming buffer starting at the specified offset.
|
57
|
-
*
|
58
|
-
* @param bytes A buffer containing items from a serialized sketch
|
59
|
-
* @param offset The starting offset into the bytes buffer
|
60
|
-
* @return A Python tuple of the reconstructed item and the total number of bytes read
|
61
|
-
*/
|
62
|
-
virtual py::tuple from_bytes(py::bytes& bytes, size_t offset) const = 0;
|
63
|
-
|
64
|
-
virtual ~py_object_serde() = default;
|
65
|
-
|
66
|
-
// these methods are required by the serde interface; see common/include/serde.hpp for
|
67
|
-
// default implementations for C++ std::string and numeric types.
|
68
|
-
size_t size_of_item(const py::object& item) const;
|
69
|
-
size_t serialize(void* ptr, size_t capacity, const py::object* items, unsigned num) const;
|
70
|
-
size_t deserialize(const void* ptr, size_t capacity, py::object* items, unsigned num) const;
|
71
|
-
};
|
72
|
-
|
73
|
-
/**
|
74
|
-
* @brief The PyObjectSerDe class provides a concrete base class
|
75
|
-
* that pybind11 uses as a "trampoline" to pass calls through to
|
76
|
-
* the abstract py_object_serde class. Custom Python serde implementations
|
77
|
-
* must extend this class.
|
78
|
-
*/
|
79
|
-
struct PyObjectSerDe : public py_object_serde {
|
80
|
-
using py_object_serde::py_object_serde;
|
81
|
-
|
82
|
-
// trampoline definitions -- need one for each virtual function
|
83
|
-
int64_t get_size(const py::object& item) const override {
|
84
|
-
PYBIND11_OVERRIDE_PURE(
|
85
|
-
int64_t, // Return type
|
86
|
-
py_object_serde, // Parent class
|
87
|
-
get_size, // Name of function in C++ (must match Python name)
|
88
|
-
item // Argument(s)
|
89
|
-
);
|
90
|
-
}
|
91
|
-
|
92
|
-
py::bytes to_bytes(const py::object& item) const override {
|
93
|
-
PYBIND11_OVERRIDE_PURE(
|
94
|
-
py::bytes, // Return type
|
95
|
-
py_object_serde, // Parent class
|
96
|
-
to_bytes, // Name of function in C++ (must match Python name)
|
97
|
-
item // Argument(s)
|
98
|
-
);
|
99
|
-
}
|
100
|
-
|
101
|
-
py::tuple from_bytes(py::bytes& bytes, size_t offset) const override {
|
102
|
-
PYBIND11_OVERRIDE_PURE(
|
103
|
-
py::tuple, // Return type
|
104
|
-
py_object_serde, // Parent class
|
105
|
-
from_bytes, // Name of function in C++ (must match Python name)
|
106
|
-
bytes, offset // Argument(s)
|
107
|
-
);
|
108
|
-
}
|
109
|
-
};
|
110
|
-
|
111
|
-
}
|
112
|
-
|
113
|
-
#endif // _PY_SERDE_HPP_
|