datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(kll_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(kll_test kll common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(kll_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" KLL_TEST_BINARY_PATH)
|
|
28
|
+
string(APPEND KLL_TEST_BINARY_PATH "/")
|
|
29
|
+
target_compile_definitions(kll_test
|
|
30
|
+
PRIVATE
|
|
31
|
+
TEST_BINARY_INPUT_PATH="${KLL_TEST_BINARY_PATH}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME kll_test
|
|
36
|
+
COMMAND kll_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(kll_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
kll_sketch_test.cpp
|
|
42
|
+
kll_sketch_custom_type_test.cpp
|
|
43
|
+
kll_sketch_validation.cpp
|
|
44
|
+
)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <sstream>
|
|
22
|
+
|
|
23
|
+
#include <kll_sketch.hpp>
|
|
24
|
+
#include <test_allocator.hpp>
|
|
25
|
+
#include <test_type.hpp>
|
|
26
|
+
|
|
27
|
+
namespace datasketches {
|
|
28
|
+
|
|
29
|
+
typedef kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>> kll_test_type_sketch;
|
|
30
|
+
|
|
31
|
+
TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
32
|
+
|
|
33
|
+
// setup section
|
|
34
|
+
test_allocator_total_bytes = 0;
|
|
35
|
+
|
|
36
|
+
SECTION("compact level zero") {
|
|
37
|
+
kll_test_type_sketch sketch(8);
|
|
38
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
39
|
+
REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
|
|
40
|
+
REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
|
|
41
|
+
REQUIRE(sketch.get_serialized_size_bytes() == 8);
|
|
42
|
+
|
|
43
|
+
sketch.update(1);
|
|
44
|
+
sketch.update(2);
|
|
45
|
+
sketch.update(3);
|
|
46
|
+
sketch.update(4);
|
|
47
|
+
sketch.update(5);
|
|
48
|
+
sketch.update(6);
|
|
49
|
+
sketch.update(7);
|
|
50
|
+
sketch.update(8);
|
|
51
|
+
sketch.update(9);
|
|
52
|
+
|
|
53
|
+
//sketch.to_stream(std::cout);
|
|
54
|
+
|
|
55
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
56
|
+
REQUIRE(sketch.get_n() > sketch.get_num_retained());
|
|
57
|
+
REQUIRE(sketch.get_min_value().get_value() == 1);
|
|
58
|
+
REQUIRE(sketch.get_max_value().get_value() == 9);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
SECTION("merge small") {
|
|
62
|
+
kll_test_type_sketch sketch1(8);
|
|
63
|
+
sketch1.update(1);
|
|
64
|
+
|
|
65
|
+
kll_test_type_sketch sketch2(8);
|
|
66
|
+
sketch2.update(2);
|
|
67
|
+
|
|
68
|
+
sketch2.merge(sketch1);
|
|
69
|
+
|
|
70
|
+
//sketch2.to_stream(std::cout);
|
|
71
|
+
|
|
72
|
+
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
73
|
+
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
|
|
74
|
+
REQUIRE(sketch2.get_min_value().get_value() == 1);
|
|
75
|
+
REQUIRE(sketch2.get_max_value().get_value() == 2);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
SECTION("merge higher levels") {
|
|
79
|
+
kll_test_type_sketch sketch1(8);
|
|
80
|
+
sketch1.update(1);
|
|
81
|
+
sketch1.update(2);
|
|
82
|
+
sketch1.update(3);
|
|
83
|
+
sketch1.update(4);
|
|
84
|
+
sketch1.update(5);
|
|
85
|
+
sketch1.update(6);
|
|
86
|
+
sketch1.update(7);
|
|
87
|
+
sketch1.update(8);
|
|
88
|
+
sketch1.update(9);
|
|
89
|
+
|
|
90
|
+
kll_test_type_sketch sketch2(8);
|
|
91
|
+
sketch2.update(10);
|
|
92
|
+
sketch2.update(11);
|
|
93
|
+
sketch2.update(12);
|
|
94
|
+
sketch2.update(13);
|
|
95
|
+
sketch2.update(14);
|
|
96
|
+
sketch2.update(15);
|
|
97
|
+
sketch2.update(16);
|
|
98
|
+
sketch2.update(17);
|
|
99
|
+
sketch2.update(18);
|
|
100
|
+
|
|
101
|
+
sketch2.merge(sketch1);
|
|
102
|
+
|
|
103
|
+
//sketch2.to_stream(std::cout);
|
|
104
|
+
|
|
105
|
+
REQUIRE(sketch2.is_estimation_mode());
|
|
106
|
+
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
|
|
107
|
+
REQUIRE(sketch2.get_min_value().get_value() == 1);
|
|
108
|
+
REQUIRE(sketch2.get_max_value().get_value() == 18);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
SECTION("serialize deserialize") {
|
|
112
|
+
kll_test_type_sketch sketch1;
|
|
113
|
+
|
|
114
|
+
const int n = 1000;
|
|
115
|
+
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
116
|
+
|
|
117
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
118
|
+
sketch1.serialize(s);
|
|
119
|
+
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
|
|
120
|
+
auto sketch2 = kll_test_type_sketch::deserialize(s);
|
|
121
|
+
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
|
|
122
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
123
|
+
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
124
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
125
|
+
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
126
|
+
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
127
|
+
REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
|
|
128
|
+
REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
|
|
129
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
130
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
131
|
+
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
|
|
132
|
+
REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
|
|
133
|
+
REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
|
|
134
|
+
REQUIRE(sketch2.get_rank(n / 2) == sketch1.get_rank(n / 2));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
SECTION("moving merge") {
|
|
138
|
+
kll_test_type_sketch sketch1(8);
|
|
139
|
+
for (int i = 0; i < 10; i++) sketch1.update(i);
|
|
140
|
+
kll_test_type_sketch sketch2(8);
|
|
141
|
+
sketch2.update(10);
|
|
142
|
+
sketch2.merge(std::move(sketch1));
|
|
143
|
+
REQUIRE(sketch2.get_min_value().get_value() == 0);
|
|
144
|
+
REQUIRE(sketch2.get_max_value().get_value() == 10);
|
|
145
|
+
REQUIRE(sketch2.get_n() == 11);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// cleanup
|
|
149
|
+
if (test_allocator_total_bytes != 0) {
|
|
150
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
} /* namespace datasketches */
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,685 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <cmath>
|
|
22
|
+
#include <cstring>
|
|
23
|
+
#include <sstream>
|
|
24
|
+
#include <fstream>
|
|
25
|
+
|
|
26
|
+
#include <kll_sketch.hpp>
|
|
27
|
+
#include <test_allocator.hpp>
|
|
28
|
+
|
|
29
|
+
namespace datasketches {
|
|
30
|
+
|
|
31
|
+
static const double RANK_EPS_FOR_K_200 = 0.0133;
|
|
32
|
+
static const double NUMERIC_NOISE_TOLERANCE = 1E-6;
|
|
33
|
+
|
|
34
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
35
|
+
static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
|
|
36
|
+
#else
|
|
37
|
+
static std::string testBinaryInputPath = "test/";
|
|
38
|
+
#endif
|
|
39
|
+
|
|
40
|
+
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
|
|
41
|
+
typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
|
|
42
|
+
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
|
|
43
|
+
typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
|
|
44
|
+
|
|
45
|
+
TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
46
|
+
|
|
47
|
+
// setup
|
|
48
|
+
test_allocator_total_bytes = 0;
|
|
49
|
+
|
|
50
|
+
SECTION("k limits") {
|
|
51
|
+
kll_float_sketch sketch1(kll_float_sketch::MIN_K); // this should work
|
|
52
|
+
kll_float_sketch sketch2(kll_float_sketch::MAX_K); // this should work
|
|
53
|
+
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1), std::invalid_argument);
|
|
54
|
+
// MAX_K + 1 makes no sense because k is uint16_t
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
SECTION("empty") {
|
|
58
|
+
kll_float_sketch sketch;
|
|
59
|
+
REQUIRE(sketch.is_empty());
|
|
60
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
61
|
+
REQUIRE(sketch.get_n() == 0);
|
|
62
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
63
|
+
REQUIRE(std::isnan(sketch.get_rank(0)));
|
|
64
|
+
REQUIRE(std::isnan(sketch.get_min_value()));
|
|
65
|
+
REQUIRE(std::isnan(sketch.get_max_value()));
|
|
66
|
+
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
|
|
67
|
+
const double fractions[3] {0, 0.5, 1};
|
|
68
|
+
REQUIRE(sketch.get_quantiles(fractions, 3).size() == 0);
|
|
69
|
+
const float split_points[1] {0};
|
|
70
|
+
REQUIRE(sketch.get_PMF(split_points, 1).size() == 0);
|
|
71
|
+
REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
|
|
72
|
+
|
|
73
|
+
int count = 0;
|
|
74
|
+
for (auto& it: sketch) {
|
|
75
|
+
(void) it; // to suppress "unused" warning
|
|
76
|
+
++count;
|
|
77
|
+
}
|
|
78
|
+
REQUIRE(count == 0);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
SECTION("get bad quantile") {
|
|
82
|
+
kll_float_sketch sketch;
|
|
83
|
+
sketch.update(0); // has to be non-empty to reach the check
|
|
84
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
SECTION("one item") {
|
|
88
|
+
kll_float_sketch sketch;
|
|
89
|
+
sketch.update(1);
|
|
90
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
91
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
92
|
+
REQUIRE(sketch.get_n() == 1);
|
|
93
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
94
|
+
REQUIRE(sketch.get_rank(1) == 0.0);
|
|
95
|
+
REQUIRE(sketch.get_rank(2) == 1.0);
|
|
96
|
+
REQUIRE(sketch.get_min_value() == 1.0);
|
|
97
|
+
REQUIRE(sketch.get_max_value() == 1.0);
|
|
98
|
+
REQUIRE(sketch.get_quantile(0.5) == 1.0);
|
|
99
|
+
const double fractions[3] {0, 0.5, 1};
|
|
100
|
+
auto quantiles = sketch.get_quantiles(fractions, 3);
|
|
101
|
+
REQUIRE(quantiles.size() == 3);
|
|
102
|
+
REQUIRE(quantiles[0] == 1.0);
|
|
103
|
+
REQUIRE(quantiles[1] == 1.0);
|
|
104
|
+
REQUIRE(quantiles[2] == 1.0);
|
|
105
|
+
|
|
106
|
+
int count = 0;
|
|
107
|
+
for (auto& it: sketch) {
|
|
108
|
+
REQUIRE(it.second == 1);
|
|
109
|
+
++count;
|
|
110
|
+
}
|
|
111
|
+
REQUIRE(count == 1);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
SECTION("NaN") {
|
|
115
|
+
kll_float_sketch sketch;
|
|
116
|
+
sketch.update(std::numeric_limits<float>::quiet_NaN());
|
|
117
|
+
REQUIRE(sketch.is_empty());
|
|
118
|
+
|
|
119
|
+
sketch.update(0.0);
|
|
120
|
+
sketch.update(std::numeric_limits<float>::quiet_NaN());
|
|
121
|
+
REQUIRE(sketch.get_n() == 1);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
SECTION("many items, exact mode") {
|
|
125
|
+
kll_float_sketch sketch;
|
|
126
|
+
const uint32_t n(200);
|
|
127
|
+
for (uint32_t i = 0; i < n; i++) {
|
|
128
|
+
sketch.update(i);
|
|
129
|
+
REQUIRE(sketch.get_n() == i + 1);
|
|
130
|
+
}
|
|
131
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
132
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
133
|
+
REQUIRE(sketch.get_num_retained() == n);
|
|
134
|
+
REQUIRE(sketch.get_min_value() == 0.0);
|
|
135
|
+
REQUIRE(sketch.get_quantile(0) == 0.0);
|
|
136
|
+
REQUIRE(sketch.get_max_value() == n - 1);
|
|
137
|
+
REQUIRE(sketch.get_quantile(1) == n - 1);
|
|
138
|
+
|
|
139
|
+
const double fractions[3] {0, 0.5, 1};
|
|
140
|
+
auto quantiles = sketch.get_quantiles(fractions, 3);
|
|
141
|
+
REQUIRE(quantiles.size() == 3);
|
|
142
|
+
REQUIRE(quantiles[0] == 0.0);
|
|
143
|
+
REQUIRE(quantiles[1] == n / 2);
|
|
144
|
+
REQUIRE(quantiles[2] == n - 1 );
|
|
145
|
+
|
|
146
|
+
for (uint32_t i = 0; i < n; i++) {
|
|
147
|
+
const double trueRank = (double) i / n;
|
|
148
|
+
REQUIRE(sketch.get_rank(i) == trueRank);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// the alternative method must produce the same result
|
|
152
|
+
auto quantiles2 = sketch.get_quantiles(3);
|
|
153
|
+
REQUIRE(quantiles2.size() == 3);
|
|
154
|
+
REQUIRE(quantiles[0] == quantiles2[0]);
|
|
155
|
+
REQUIRE(quantiles[1] == quantiles2[1]);
|
|
156
|
+
REQUIRE(quantiles[2] == quantiles2[2]);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
SECTION("10 items") {
|
|
160
|
+
kll_float_sketch sketch;
|
|
161
|
+
sketch.update(1);
|
|
162
|
+
sketch.update(2);
|
|
163
|
+
sketch.update(3);
|
|
164
|
+
sketch.update(4);
|
|
165
|
+
sketch.update(5);
|
|
166
|
+
sketch.update(6);
|
|
167
|
+
sketch.update(7);
|
|
168
|
+
sketch.update(8);
|
|
169
|
+
sketch.update(9);
|
|
170
|
+
sketch.update(10);
|
|
171
|
+
REQUIRE(sketch.get_quantile(0) == 1.0);
|
|
172
|
+
REQUIRE(sketch.get_quantile(0.5) == 6.0);
|
|
173
|
+
REQUIRE(sketch.get_quantile(0.99) == 10.0);
|
|
174
|
+
REQUIRE(sketch.get_quantile(1) == 10.0);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
SECTION("100 items") {
|
|
178
|
+
kll_float_sketch sketch;
|
|
179
|
+
for (int i = 0; i < 100; ++i) sketch.update(i);
|
|
180
|
+
REQUIRE(sketch.get_quantile(0) == 0);
|
|
181
|
+
REQUIRE(sketch.get_quantile(0.01) == 1);
|
|
182
|
+
REQUIRE(sketch.get_quantile(0.5) == 50);
|
|
183
|
+
REQUIRE(sketch.get_quantile(0.99) == 99.0);
|
|
184
|
+
REQUIRE(sketch.get_quantile(1) == 99.0);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
SECTION("many items, estimation mode") {
|
|
188
|
+
kll_float_sketch sketch;
|
|
189
|
+
const int n(1000000);
|
|
190
|
+
for (int i = 0; i < n; i++) {
|
|
191
|
+
sketch.update(i);
|
|
192
|
+
REQUIRE(sketch.get_n() == static_cast<uint64_t>(i + 1));
|
|
193
|
+
}
|
|
194
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
195
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
196
|
+
REQUIRE(sketch.get_min_value() == 0.0); // min value is exact
|
|
197
|
+
REQUIRE(sketch.get_quantile(0) == 0.0); // min value is exact
|
|
198
|
+
REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
|
|
199
|
+
REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
|
|
200
|
+
|
|
201
|
+
// test rank
|
|
202
|
+
for (int i = 0; i < n; i++) {
|
|
203
|
+
const double trueRank = (double) i / n;
|
|
204
|
+
REQUIRE(sketch.get_rank(i) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// test quantiles at every 0.1 percentage point
|
|
208
|
+
double fractions[1001];
|
|
209
|
+
double reverse_fractions[1001]; // check that ordering does not matter
|
|
210
|
+
for (int i = 0; i < 1001; i++) {
|
|
211
|
+
fractions[i] = (double) i / 1000;
|
|
212
|
+
reverse_fractions[1000 - i] = fractions[i];
|
|
213
|
+
}
|
|
214
|
+
auto quantiles = sketch.get_quantiles(fractions, 1001);
|
|
215
|
+
auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
|
|
216
|
+
float previous_quantile(0);
|
|
217
|
+
for (int i = 0; i < 1001; i++) {
|
|
218
|
+
// expensive in a loop, just to check the equivalence here, not advised for real code
|
|
219
|
+
const float quantile = sketch.get_quantile(fractions[i]);
|
|
220
|
+
REQUIRE(quantiles[i] == quantile);
|
|
221
|
+
REQUIRE(reverse_quantiles[1000 - i] == quantile);
|
|
222
|
+
REQUIRE(previous_quantile <= quantile);
|
|
223
|
+
previous_quantile = quantile;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
//std::cout << sketch.to_string();
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
SECTION("consistency between get_rank adn get_PMF/CDF") {
|
|
230
|
+
kll_float_sketch sketch;
|
|
231
|
+
const int n = 1000;
|
|
232
|
+
float values[n];
|
|
233
|
+
for (int i = 0; i < n; i++) {
|
|
234
|
+
sketch.update(i);
|
|
235
|
+
values[i] = i;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const auto ranks(sketch.get_CDF(values, n));
|
|
239
|
+
const auto pmf(sketch.get_PMF(values, n));
|
|
240
|
+
|
|
241
|
+
double subtotal_pmf(0);
|
|
242
|
+
for (int i = 0; i < n; i++) {
|
|
243
|
+
if (sketch.get_rank(values[i]) != ranks[i]) {
|
|
244
|
+
std::cerr << "checking rank vs CDF for value " << i << std::endl;
|
|
245
|
+
REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
|
|
246
|
+
}
|
|
247
|
+
subtotal_pmf += pmf[i];
|
|
248
|
+
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
|
|
249
|
+
std::cerr << "CDF vs PMF for value " << i << std::endl;
|
|
250
|
+
REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
SECTION("deserialize from java") {
|
|
256
|
+
std::ifstream is;
|
|
257
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
258
|
+
is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
|
|
259
|
+
auto sketch = kll_float_sketch::deserialize(is);
|
|
260
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
261
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
262
|
+
REQUIRE(sketch.get_n() == 1000000);
|
|
263
|
+
REQUIRE(sketch.get_num_retained() == 614);
|
|
264
|
+
REQUIRE(sketch.get_min_value() == 0.0);
|
|
265
|
+
REQUIRE(sketch.get_max_value() == 999999.0);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
SECTION("stream serialize deserialize empty") {
|
|
269
|
+
kll_float_sketch sketch;
|
|
270
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
271
|
+
sketch.serialize(s);
|
|
272
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
273
|
+
auto sketch2 = kll_float_sketch::deserialize(s);
|
|
274
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
275
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
276
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
277
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
278
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
279
|
+
REQUIRE(std::isnan(sketch2.get_min_value()));
|
|
280
|
+
REQUIRE(std::isnan(sketch2.get_max_value()));
|
|
281
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
282
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
SECTION("bytes serialize deserialize empty") {
|
|
286
|
+
kll_float_sketch sketch;
|
|
287
|
+
auto bytes = sketch.serialize();
|
|
288
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
|
|
289
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
290
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
291
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
292
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
293
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
294
|
+
REQUIRE(std::isnan(sketch2.get_min_value()));
|
|
295
|
+
REQUIRE(std::isnan(sketch2.get_max_value()));
|
|
296
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
297
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
SECTION("serialize deserialize one item") {
|
|
301
|
+
kll_float_sketch sketch;
|
|
302
|
+
sketch.update(1);
|
|
303
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
304
|
+
sketch.serialize(s);
|
|
305
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
306
|
+
auto sketch2 = kll_float_sketch::deserialize(s);
|
|
307
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
308
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
309
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
310
|
+
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
311
|
+
REQUIRE(sketch2.get_n() == 1);
|
|
312
|
+
REQUIRE(sketch2.get_num_retained() == 1);
|
|
313
|
+
REQUIRE(sketch2.get_min_value() == 1.0);
|
|
314
|
+
REQUIRE(sketch2.get_max_value() == 1.0);
|
|
315
|
+
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
|
|
316
|
+
REQUIRE(sketch2.get_rank(1) == 0.0);
|
|
317
|
+
REQUIRE(sketch2.get_rank(2) == 1.0);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
SECTION("deserialize one item v1") {
|
|
321
|
+
std::ifstream is;
|
|
322
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
323
|
+
is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
|
|
324
|
+
auto sketch = kll_float_sketch::deserialize(is);
|
|
325
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
326
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
327
|
+
REQUIRE(sketch.get_n() == 1);
|
|
328
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
329
|
+
REQUIRE(sketch.get_min_value() == 1.0);
|
|
330
|
+
REQUIRE(sketch.get_max_value() == 1.0);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
SECTION("stream serialize deserialize many floats") {
|
|
334
|
+
kll_float_sketch sketch;
|
|
335
|
+
const int n(1000);
|
|
336
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
337
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
338
|
+
sketch.serialize(s);
|
|
339
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
340
|
+
auto sketch2 = kll_float_sketch::deserialize(s);
|
|
341
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
342
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
343
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
344
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
345
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
346
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
347
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
348
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
349
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
350
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
351
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
352
|
+
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
353
|
+
REQUIRE(sketch2.get_rank(n) == sketch.get_rank(n));
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
SECTION("bytes serialize deserialize many floats") {
|
|
357
|
+
kll_float_sketch sketch;
|
|
358
|
+
const int n(1000);
|
|
359
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
360
|
+
auto bytes = sketch.serialize();
|
|
361
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
362
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
|
|
363
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
364
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
365
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
366
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
367
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
368
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
369
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
370
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
371
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
372
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
373
|
+
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
374
|
+
REQUIRE(sketch2.get_rank(n) == sketch.get_rank(n));
|
|
375
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 7), std::out_of_range);
|
|
376
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 15), std::out_of_range);
|
|
377
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
SECTION("bytes serialize deserialize many ints") {
|
|
381
|
+
kll_sketch<int> sketch;
|
|
382
|
+
const int n(1000);
|
|
383
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
384
|
+
auto bytes = sketch.serialize();
|
|
385
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
386
|
+
auto sketch2 = kll_sketch<int>::deserialize(bytes.data(), bytes.size());
|
|
387
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
388
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
389
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
390
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
391
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
392
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
393
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
394
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
395
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
396
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
397
|
+
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
398
|
+
REQUIRE(sketch2.get_rank(n) == sketch.get_rank(n));
|
|
399
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 7), std::out_of_range);
|
|
400
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 15), std::out_of_range);
|
|
401
|
+
REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
SECTION("floor of log2 of fraction") {
|
|
405
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(0, 1) == 0);
|
|
406
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(1, 2) == 0);
|
|
407
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(2, 2) == 0);
|
|
408
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(3, 2) == 0);
|
|
409
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(4, 2) == 1);
|
|
410
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(5, 2) == 1);
|
|
411
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(6, 2) == 1);
|
|
412
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(7, 2) == 1);
|
|
413
|
+
REQUIRE(kll_helper::floor_of_log2_of_fraction(8, 2) == 2);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
SECTION("out of order split points, float") {
|
|
417
|
+
kll_float_sketch sketch;
|
|
418
|
+
sketch.update(0); // has too be non-empty to reach the check
|
|
419
|
+
float split_points[2] = {1, 0};
|
|
420
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
SECTION("out of order split points, int") {
|
|
424
|
+
kll_sketch<int> sketch;
|
|
425
|
+
sketch.update(0); // has too be non-empty to reach the check
|
|
426
|
+
int split_points[2] = {1, 0};
|
|
427
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
SECTION("NaN split point") {
|
|
431
|
+
kll_float_sketch sketch;
|
|
432
|
+
sketch.update(0); // has too be non-empty to reach the check
|
|
433
|
+
float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
|
|
434
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
SECTION("merge") {
|
|
438
|
+
kll_float_sketch sketch1;
|
|
439
|
+
kll_float_sketch sketch2;
|
|
440
|
+
const int n = 10000;
|
|
441
|
+
for (int i = 0; i < n; i++) {
|
|
442
|
+
sketch1.update(i);
|
|
443
|
+
sketch2.update((2 * n) - i - 1);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
REQUIRE(sketch1.get_min_value() == 0.0f);
|
|
447
|
+
REQUIRE(sketch1.get_max_value() == n - 1);
|
|
448
|
+
REQUIRE(sketch2.get_min_value() == n);
|
|
449
|
+
REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
|
|
450
|
+
|
|
451
|
+
sketch1.merge(sketch2);
|
|
452
|
+
|
|
453
|
+
REQUIRE_FALSE(sketch1.is_empty());
|
|
454
|
+
REQUIRE(sketch1.get_n() == 2 * n);
|
|
455
|
+
REQUIRE(sketch1.get_min_value() == 0.0f);
|
|
456
|
+
REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
|
|
457
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
SECTION("merge lower k") {
|
|
461
|
+
kll_float_sketch sketch1(256);
|
|
462
|
+
kll_float_sketch sketch2(128);
|
|
463
|
+
const int n = 10000;
|
|
464
|
+
for (int i = 0; i < n; i++) {
|
|
465
|
+
sketch1.update(i);
|
|
466
|
+
sketch2.update((2 * n) - i - 1);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
REQUIRE(sketch1.get_min_value() == 0.0f);
|
|
470
|
+
REQUIRE(sketch1.get_max_value() == n - 1);
|
|
471
|
+
REQUIRE(sketch2.get_min_value() == n);
|
|
472
|
+
REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
|
|
473
|
+
|
|
474
|
+
REQUIRE(sketch1.get_normalized_rank_error(false) < sketch2.get_normalized_rank_error(false));
|
|
475
|
+
REQUIRE(sketch1.get_normalized_rank_error(true) < sketch2.get_normalized_rank_error(true));
|
|
476
|
+
|
|
477
|
+
sketch1.merge(sketch2);
|
|
478
|
+
|
|
479
|
+
// sketch1 must get "contaminated" by the lower K in sketch2
|
|
480
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
481
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
482
|
+
|
|
483
|
+
REQUIRE_FALSE(sketch1.is_empty());
|
|
484
|
+
REQUIRE(sketch1.get_n() == 2 * n);
|
|
485
|
+
REQUIRE(sketch1.get_min_value() == 0.0f);
|
|
486
|
+
REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
|
|
487
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
SECTION("merge exact mode, lower k") {
|
|
491
|
+
kll_float_sketch sketch1(256);
|
|
492
|
+
kll_float_sketch sketch2(128);
|
|
493
|
+
const int n = 10000;
|
|
494
|
+
for (int i = 0; i < n; i++) {
|
|
495
|
+
sketch1.update(i);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// rank error should not be affected by a merge with an empty sketch with lower k
|
|
499
|
+
const double rank_error_before_merge = sketch1.get_normalized_rank_error(true);
|
|
500
|
+
sketch1.merge(sketch2);
|
|
501
|
+
REQUIRE(sketch1.get_normalized_rank_error(true) == rank_error_before_merge);
|
|
502
|
+
|
|
503
|
+
REQUIRE_FALSE(sketch1.is_empty());
|
|
504
|
+
REQUIRE(sketch1.get_n() == n);
|
|
505
|
+
REQUIRE(sketch1.get_min_value() == 0.0f);
|
|
506
|
+
REQUIRE(sketch1.get_max_value() == n - 1);
|
|
507
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
|
|
508
|
+
|
|
509
|
+
sketch2.update(0);
|
|
510
|
+
sketch1.merge(sketch2);
|
|
511
|
+
// rank error should not be affected by a merge with a sketch in exact mode with lower k
|
|
512
|
+
REQUIRE(sketch1.get_normalized_rank_error(true) == rank_error_before_merge);
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
SECTION("merge min value from other") {
|
|
516
|
+
kll_float_sketch sketch1;
|
|
517
|
+
kll_float_sketch sketch2;
|
|
518
|
+
sketch1.update(1);
|
|
519
|
+
sketch2.update(2);
|
|
520
|
+
sketch2.merge(sketch1);
|
|
521
|
+
REQUIRE(sketch2.get_min_value() == 1.0f);
|
|
522
|
+
REQUIRE(sketch2.get_max_value() == 2.0f);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
SECTION("merge min and max values from other") {
|
|
526
|
+
kll_float_sketch sketch1;
|
|
527
|
+
for (int i = 0; i < 1000000; i++) sketch1.update(i);
|
|
528
|
+
kll_float_sketch sketch2;
|
|
529
|
+
sketch2.merge(sketch1);
|
|
530
|
+
REQUIRE(sketch2.get_min_value() == 0.0f);
|
|
531
|
+
REQUIRE(sketch2.get_max_value() == 999999.0f);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
SECTION("sketch of ints") {
|
|
535
|
+
kll_sketch<int> sketch;
|
|
536
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
537
|
+
REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
|
|
538
|
+
REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
|
|
539
|
+
|
|
540
|
+
const int n(1000);
|
|
541
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
542
|
+
|
|
543
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
544
|
+
sketch.serialize(s);
|
|
545
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
546
|
+
auto sketch2 = kll_sketch<int>::deserialize(s);
|
|
547
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
548
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
549
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
550
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
551
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
552
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
553
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
554
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
555
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
556
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
557
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
558
|
+
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
559
|
+
REQUIRE(sketch2.get_rank(n) == sketch.get_rank(n));
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
SECTION("sketch of strings stream") {
|
|
563
|
+
kll_string_sketch sketch1;
|
|
564
|
+
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
565
|
+
REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
|
|
566
|
+
REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
|
|
567
|
+
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
568
|
+
|
|
569
|
+
const int n = 1000;
|
|
570
|
+
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
571
|
+
|
|
572
|
+
REQUIRE(sketch1.get_min_value() == std::string("0"));
|
|
573
|
+
REQUIRE(sketch1.get_max_value() == std::string("999"));
|
|
574
|
+
|
|
575
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
576
|
+
sketch1.serialize(s);
|
|
577
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
|
|
578
|
+
auto sketch2 = kll_string_sketch::deserialize(s);
|
|
579
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
580
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
581
|
+
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
582
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
583
|
+
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
584
|
+
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
585
|
+
REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
|
|
586
|
+
REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
|
|
587
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
588
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
589
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
590
|
+
REQUIRE(sketch2.get_rank(std::to_string(0)) == sketch1.get_rank(std::to_string(0)));
|
|
591
|
+
REQUIRE(sketch2.get_rank(std::to_string(n)) == sketch1.get_rank(std::to_string(n)));
|
|
592
|
+
|
|
593
|
+
// to take a look using hexdump
|
|
594
|
+
//std::ofstream os("kll-string.sk");
|
|
595
|
+
//sketch1.serialize(os);
|
|
596
|
+
|
|
597
|
+
// debug print
|
|
598
|
+
//sketch1.to_stream(std::cout);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
SECTION("sketch of strings bytes") {
|
|
602
|
+
kll_string_sketch sketch1;
|
|
603
|
+
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
604
|
+
REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
|
|
605
|
+
REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
|
|
606
|
+
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
607
|
+
|
|
608
|
+
const int n = 1000;
|
|
609
|
+
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
610
|
+
|
|
611
|
+
REQUIRE(sketch1.get_min_value() == std::string("0"));
|
|
612
|
+
REQUIRE(sketch1.get_max_value() == std::string("999"));
|
|
613
|
+
|
|
614
|
+
auto bytes = sketch1.serialize();
|
|
615
|
+
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
616
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
|
|
617
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
618
|
+
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
619
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
620
|
+
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
621
|
+
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
622
|
+
REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
|
|
623
|
+
REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
|
|
624
|
+
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
625
|
+
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
626
|
+
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
627
|
+
REQUIRE(sketch2.get_rank(std::to_string(0)) == sketch1.get_rank(std::to_string(0)));
|
|
628
|
+
REQUIRE(sketch2.get_rank(std::to_string(n)) == sketch1.get_rank(std::to_string(n)));
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
SECTION("sketch of strings, single item, bytes") {
|
|
633
|
+
kll_string_sketch sketch1;
|
|
634
|
+
sketch1.update("a");
|
|
635
|
+
auto bytes = sketch1.serialize();
|
|
636
|
+
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
637
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
|
|
638
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
SECTION("copy") {
|
|
642
|
+
kll_sketch<int> sketch1;
|
|
643
|
+
const int n(1000);
|
|
644
|
+
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
645
|
+
|
|
646
|
+
// copy constructor
|
|
647
|
+
kll_sketch<int> sketch2(sketch1);
|
|
648
|
+
for (int i = 0; i < n; i++) {
|
|
649
|
+
REQUIRE(sketch2.get_rank(i) == sketch1.get_rank(i));
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// copy assignment
|
|
653
|
+
kll_sketch<int> sketch3;
|
|
654
|
+
sketch3 = sketch1;
|
|
655
|
+
for (int i = 0; i < n; i++) {
|
|
656
|
+
REQUIRE(sketch3.get_rank(i) == sketch1.get_rank(i));
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
SECTION("move") {
|
|
661
|
+
kll_sketch<int> sketch1;
|
|
662
|
+
const int n(100);
|
|
663
|
+
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
664
|
+
|
|
665
|
+
// move constructor
|
|
666
|
+
kll_sketch<int> sketch2(std::move(sketch1));
|
|
667
|
+
for (int i = 0; i < n; i++) {
|
|
668
|
+
REQUIRE(sketch2.get_rank(i) == (double) i / n);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// move assignment
|
|
672
|
+
kll_sketch<int> sketch3;
|
|
673
|
+
sketch3 = std::move(sketch2);
|
|
674
|
+
for (int i = 0; i < n; i++) {
|
|
675
|
+
REQUIRE(sketch3.get_rank(i) == (double) i / n);
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// cleanup
|
|
680
|
+
if (test_allocator_total_bytes != 0) {
|
|
681
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
} /* namespace datasketches */
|