datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(cpc_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(cpc_test cpc common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(cpc_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
#file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" CPC_TEST_BINARY_PATH)
|
|
28
|
+
#string(APPEND CPC_TEST_BINARY_PATH "/")
|
|
29
|
+
#target_compile_definitions(cpc_test
|
|
30
|
+
# PRIVATE
|
|
31
|
+
# TEST_BINARY_INPUT_PATH="${CPC_TEST_BINARY_PATH}"
|
|
32
|
+
#)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME cpc_test
|
|
36
|
+
COMMAND cpc_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(cpc_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
cpc_sketch_test.cpp
|
|
42
|
+
cpc_union_test.cpp
|
|
43
|
+
compression_test.cpp
|
|
44
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <algorithm>
|
|
22
|
+
|
|
23
|
+
#include "cpc_compressor.hpp"
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
typedef u32_table<std::allocator<void>> table;
|
|
28
|
+
|
|
29
|
+
TEST_CASE("cpc sketch: compress and decompress pairs", "[cpc_sketch]") {
|
|
30
|
+
const int N = 200;
|
|
31
|
+
const int MAXWORDS = 1000;
|
|
32
|
+
|
|
33
|
+
HashState twoHashes;
|
|
34
|
+
uint32_t pairArray[N];
|
|
35
|
+
uint32_t pairArray2[N];
|
|
36
|
+
uint64_t value = 35538947; // some arbitrary starting value
|
|
37
|
+
const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
|
|
38
|
+
for (int i = 0; i < N; i++) {
|
|
39
|
+
MurmurHash3_x64_128(&value, sizeof(value), 0, twoHashes);
|
|
40
|
+
uint32_t rand = twoHashes.h1 & 0xffff;
|
|
41
|
+
pairArray[i] = rand;
|
|
42
|
+
value += golden64;
|
|
43
|
+
}
|
|
44
|
+
//table::knuth_shell_sort3(pairArray, 0, N - 1); // unsigned numerical sort
|
|
45
|
+
std::sort(pairArray, &pairArray[N]);
|
|
46
|
+
uint32_t prev = UINT32_MAX;
|
|
47
|
+
int nxt = 0;
|
|
48
|
+
for (int i = 0; i < N; i++) { // uniquify
|
|
49
|
+
if (pairArray[i] != prev) {
|
|
50
|
+
prev = pairArray[i];
|
|
51
|
+
pairArray[nxt++] = pairArray[i];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
int numPairs = nxt;
|
|
55
|
+
|
|
56
|
+
uint32_t compressedWords[MAXWORDS];
|
|
57
|
+
|
|
58
|
+
for (size_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) {
|
|
59
|
+
size_t numWordsWritten = get_compressor<std::allocator<void>>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords);
|
|
60
|
+
get_compressor<std::allocator<void>>().low_level_uncompress_pairs(pairArray2, numPairs, numBaseBits, compressedWords, numWordsWritten);
|
|
61
|
+
for (int i = 0; i < numPairs; i++) {
|
|
62
|
+
REQUIRE(pairArray[i] == pairArray2[i]);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
#include <cstring>
|
|
22
|
+
#include <sstream>
|
|
23
|
+
#include <fstream>
|
|
24
|
+
|
|
25
|
+
#include <catch.hpp>
|
|
26
|
+
|
|
27
|
+
#include "cpc_sketch.hpp"
|
|
28
|
+
|
|
29
|
+
namespace datasketches {
|
|
30
|
+
|
|
31
|
+
static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;
|
|
32
|
+
|
|
33
|
+
TEST_CASE("cpc sketch: lg k limits", "[cpc_sketch]") {
|
|
34
|
+
cpc_sketch s1(CPC_MIN_LG_K); // this should work
|
|
35
|
+
cpc_sketch s2(CPC_MAX_LG_K); // this should work
|
|
36
|
+
REQUIRE_THROWS_AS(cpc_sketch(CPC_MIN_LG_K - 1), std::invalid_argument);
|
|
37
|
+
REQUIRE_THROWS_AS(cpc_sketch(CPC_MAX_LG_K + 1), std::invalid_argument);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
TEST_CASE("cpc sketch: empty", "[cpc_sketch]") {
|
|
41
|
+
cpc_sketch sketch(11);
|
|
42
|
+
REQUIRE(sketch.is_empty());
|
|
43
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
44
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
45
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
46
|
+
REQUIRE(sketch.validate());
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
TEST_CASE("cpc sketch: one value", "[cpc_sketch]") {
|
|
50
|
+
cpc_sketch sketch(11);
|
|
51
|
+
sketch.update(1);
|
|
52
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
53
|
+
REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
54
|
+
REQUIRE(sketch.get_estimate() >= sketch.get_lower_bound(1));
|
|
55
|
+
REQUIRE(sketch.get_estimate() <= sketch.get_upper_bound(1));
|
|
56
|
+
REQUIRE(sketch.validate());
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
TEST_CASE("cpc sketch: many values", "[cpc_sketch]") {
|
|
60
|
+
cpc_sketch sketch(11);
|
|
61
|
+
const int n(10000);
|
|
62
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
63
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
64
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * RELATIVE_ERROR_FOR_LG_K_11));
|
|
65
|
+
REQUIRE(sketch.get_estimate() >= sketch.get_lower_bound(1));
|
|
66
|
+
REQUIRE(sketch.get_estimate() <= sketch.get_upper_bound(1));
|
|
67
|
+
REQUIRE(sketch.validate());
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
TEST_CASE("cpc sketch: overflow bug", "[cpc_sketch]") {
|
|
71
|
+
cpc_sketch sketch(12);
|
|
72
|
+
const int n = 100000000;
|
|
73
|
+
uint64_t key = 15200000000; // problem happened with this sequence
|
|
74
|
+
for (int i = 0; i < n; i++) sketch.update(key++);
|
|
75
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
76
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.1));
|
|
77
|
+
REQUIRE(sketch.get_estimate() >= sketch.get_lower_bound(1));
|
|
78
|
+
REQUIRE(sketch.get_estimate() <= sketch.get_upper_bound(1));
|
|
79
|
+
REQUIRE(sketch.validate());
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
TEST_CASE("cpc sketch: serialize deserialize empty", "[cpc_sketch]") {
|
|
83
|
+
cpc_sketch sketch(11);
|
|
84
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
85
|
+
sketch.serialize(s);
|
|
86
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
87
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
88
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
89
|
+
REQUIRE(deserialized.validate());
|
|
90
|
+
|
|
91
|
+
std::ofstream os("cpc-empty.bin");
|
|
92
|
+
sketch.serialize(os);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
TEST_CASE("cpc sketch: serialize deserialize sparse", "[cpc_sketch]") {
|
|
96
|
+
cpc_sketch sketch(11);
|
|
97
|
+
const int n(100);
|
|
98
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
99
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
100
|
+
sketch.serialize(s);
|
|
101
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
102
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
103
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
104
|
+
REQUIRE(deserialized.validate());
|
|
105
|
+
|
|
106
|
+
// updating again with the same values should not change the sketch
|
|
107
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
108
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
109
|
+
REQUIRE(deserialized.validate());
|
|
110
|
+
|
|
111
|
+
std::ofstream os("cpc-sparse.bin");
|
|
112
|
+
sketch.serialize(os);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
TEST_CASE("cpc sketch: serialize deserialize hybrid", "[cpc_sketch]") {
|
|
116
|
+
cpc_sketch sketch(11);
|
|
117
|
+
const int n(200);
|
|
118
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
119
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
120
|
+
sketch.serialize(s);
|
|
121
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
122
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
123
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
124
|
+
REQUIRE(deserialized.validate());
|
|
125
|
+
|
|
126
|
+
// updating again with the same values should not change the sketch
|
|
127
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
128
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
129
|
+
REQUIRE(deserialized.validate());
|
|
130
|
+
|
|
131
|
+
std::ofstream os("cpc-hybrid.bin");
|
|
132
|
+
sketch.serialize(os);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
TEST_CASE("cpc sketch: serialize deserialize pinned", "[cpc_sketch]") {
|
|
136
|
+
cpc_sketch sketch(11);
|
|
137
|
+
const int n(2000);
|
|
138
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
139
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
140
|
+
sketch.serialize(s);
|
|
141
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
142
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
143
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
144
|
+
REQUIRE(deserialized.validate());
|
|
145
|
+
|
|
146
|
+
// updating again with the same values should not change the sketch
|
|
147
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
148
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
149
|
+
REQUIRE(deserialized.validate());
|
|
150
|
+
|
|
151
|
+
std::ofstream os("cpc-pinned.bin");
|
|
152
|
+
sketch.serialize(os);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
TEST_CASE("cpc sketch: serialize deserialize sliding", "[cpc_sketch]") {
|
|
156
|
+
cpc_sketch sketch(11);
|
|
157
|
+
const int n(20000);
|
|
158
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
159
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
160
|
+
sketch.serialize(s);
|
|
161
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
162
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
163
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
164
|
+
REQUIRE(deserialized.validate());
|
|
165
|
+
|
|
166
|
+
// updating again with the same values should not change the sketch
|
|
167
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
168
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
169
|
+
REQUIRE(deserialized.validate());
|
|
170
|
+
|
|
171
|
+
std::ofstream os("cpc-sliding.bin");
|
|
172
|
+
sketch.serialize(os);
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
TEST_CASE("cpc sketch: serializing deserialize sliding large", "[cpc_sketch]") {
|
|
176
|
+
cpc_sketch sketch(11);
|
|
177
|
+
const int n(3000000);
|
|
178
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
179
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
180
|
+
sketch.serialize(s);
|
|
181
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
182
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
183
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
184
|
+
REQUIRE(deserialized.validate());
|
|
185
|
+
|
|
186
|
+
// updating again with the same values should not change the sketch
|
|
187
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
188
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
189
|
+
REQUIRE(deserialized.validate());
|
|
190
|
+
|
|
191
|
+
std::ofstream os("cpc-sliding-large.bin");
|
|
192
|
+
sketch.serialize(os);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
TEST_CASE("cpc sketch: serialize deserialize empty, bytes", "[cpc_sketch]") {
|
|
196
|
+
cpc_sketch sketch(11);
|
|
197
|
+
auto bytes = sketch.serialize();
|
|
198
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
|
|
199
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
200
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
201
|
+
REQUIRE(deserialized.validate());
|
|
202
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
203
|
+
|
|
204
|
+
std::ofstream os("cpc-empty.bin");
|
|
205
|
+
sketch.serialize(os);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
TEST_CASE("cpc sketch: serialize deserialize sparse, bytes", "[cpc_sketch]") {
|
|
209
|
+
cpc_sketch sketch(11);
|
|
210
|
+
const int n(100);
|
|
211
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
212
|
+
auto bytes = sketch.serialize();
|
|
213
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
|
|
214
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
215
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
216
|
+
REQUIRE(deserialized.validate());
|
|
217
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
218
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
|
|
219
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
220
|
+
|
|
221
|
+
// updating again with the same values should not change the sketch
|
|
222
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
223
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
224
|
+
REQUIRE(deserialized.validate());
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
TEST_CASE("cpc sketch: serialize deserialize hybrid, bytes", "[cpc_sketch]") {
|
|
228
|
+
cpc_sketch sketch(11);
|
|
229
|
+
const int n(200);
|
|
230
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
231
|
+
auto bytes = sketch.serialize();
|
|
232
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
|
|
233
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
234
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
235
|
+
REQUIRE(deserialized.validate());
|
|
236
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
237
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
|
|
238
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
239
|
+
|
|
240
|
+
// updating again with the same values should not change the sketch
|
|
241
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
242
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
243
|
+
REQUIRE(deserialized.validate());
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
TEST_CASE("cpc sketch: serialize deserialize pinned, bytes", "[cpc_sketch]") {
|
|
247
|
+
cpc_sketch sketch(11);
|
|
248
|
+
const int n(2000);
|
|
249
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
250
|
+
auto bytes = sketch.serialize();
|
|
251
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
|
|
252
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
253
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
254
|
+
REQUIRE(deserialized.validate());
|
|
255
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
256
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
|
|
257
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
258
|
+
|
|
259
|
+
// updating again with the same values should not change the sketch
|
|
260
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
261
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
262
|
+
REQUIRE(deserialized.validate());
|
|
263
|
+
|
|
264
|
+
std::cout << sketch.to_string();
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
TEST_CASE("cpc sketch: serialize deserialize sliding, bytes", "[cpc_sketch]") {
|
|
268
|
+
cpc_sketch sketch(11);
|
|
269
|
+
const int n(20000);
|
|
270
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
271
|
+
auto bytes = sketch.serialize();
|
|
272
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
|
|
273
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
274
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
275
|
+
REQUIRE(deserialized.validate());
|
|
276
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
277
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
|
|
278
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
279
|
+
|
|
280
|
+
// updating again with the same values should not change the sketch
|
|
281
|
+
for (int i = 0; i < n; i++) deserialized.update(i);
|
|
282
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
283
|
+
REQUIRE(deserialized.validate());
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
TEST_CASE("cpc sketch: copy", "[cpc_sketch]") {
|
|
287
|
+
cpc_sketch s1(11);
|
|
288
|
+
s1.update(1);
|
|
289
|
+
cpc_sketch s2 = s1; // copy constructor
|
|
290
|
+
REQUIRE_FALSE(s2.is_empty());
|
|
291
|
+
REQUIRE(s2.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
292
|
+
s2.update(2);
|
|
293
|
+
s1 = s2; // operator=
|
|
294
|
+
REQUIRE(s1.get_estimate() == Approx(2).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
TEST_CASE("cpc sketch: serialize deserialize empty, custom seed", "[cpc_sketch]") {
|
|
298
|
+
cpc_sketch sketch(11, 123);
|
|
299
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
300
|
+
sketch.serialize(s);
|
|
301
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s, 123);
|
|
302
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
303
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
304
|
+
REQUIRE(deserialized.validate());
|
|
305
|
+
|
|
306
|
+
// incompatible seed
|
|
307
|
+
s.seekg(0); // rewind the stream to read the same sketch again
|
|
308
|
+
REQUIRE_THROWS_AS(cpc_sketch::deserialize(s), std::invalid_argument);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
TEST_CASE("cpc sketch: kapp range", "[cpc_sketch]") {
|
|
312
|
+
cpc_sketch s(11);
|
|
313
|
+
REQUIRE(s.get_lower_bound(1) == 0.0);
|
|
314
|
+
REQUIRE(s.get_upper_bound(1) == 0.0);
|
|
315
|
+
REQUIRE(s.get_lower_bound(2) == 0.0);
|
|
316
|
+
REQUIRE(s.get_upper_bound(2) == 0.0);
|
|
317
|
+
REQUIRE(s.get_lower_bound(3) == 0.0);
|
|
318
|
+
REQUIRE(s.get_upper_bound(3) == 0.0);
|
|
319
|
+
REQUIRE_THROWS_AS(s.get_lower_bound(4), std::invalid_argument);
|
|
320
|
+
REQUIRE_THROWS_AS(s.get_upper_bound(4), std::invalid_argument);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
TEST_CASE("cpc sketch: validate fail", "[cpc_sketch]") {
|
|
324
|
+
cpc_sketch sketch(11);
|
|
325
|
+
const int n(2000);
|
|
326
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
327
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
328
|
+
sketch.serialize(s);
|
|
329
|
+
s.seekp(700); // the stream should be 856 bytes long. corrupt it somewhere before the end
|
|
330
|
+
s << "corrupt data";
|
|
331
|
+
cpc_sketch deserialized = cpc_sketch::deserialize(s);
|
|
332
|
+
REQUIRE_FALSE(deserialized.validate());
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
TEST_CASE("cpc sketch: serialize both ways", "[cpc_sketch]") {
|
|
336
|
+
cpc_sketch sketch(11);
|
|
337
|
+
const int n(2000);
|
|
338
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
339
|
+
const int header_size_bytes = 4;
|
|
340
|
+
auto bytes = sketch.serialize(header_size_bytes);
|
|
341
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
342
|
+
sketch.serialize(s);
|
|
343
|
+
REQUIRE(static_cast<size_t>(s.tellp()) == bytes.size() - header_size_bytes);
|
|
344
|
+
|
|
345
|
+
char* pp = new char[s.tellp()];
|
|
346
|
+
s.read(pp, s.tellp());
|
|
347
|
+
REQUIRE(std::memcmp(pp, bytes.data() + header_size_bytes, bytes.size() - header_size_bytes) == 0);
|
|
348
|
+
delete [] pp;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
TEST_CASE("cpc sketch: update int equivalence", "[cpc_sketch]") {
|
|
352
|
+
cpc_sketch sketch(11);
|
|
353
|
+
sketch.update((uint64_t) -1);
|
|
354
|
+
sketch.update((int64_t) -1);
|
|
355
|
+
sketch.update((uint32_t) -1);
|
|
356
|
+
sketch.update((int32_t) -1);
|
|
357
|
+
sketch.update((uint16_t) -1);
|
|
358
|
+
sketch.update((int16_t) -1);
|
|
359
|
+
sketch.update((uint8_t) -1);
|
|
360
|
+
sketch.update((int8_t) -1);
|
|
361
|
+
REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
362
|
+
std::ofstream os("cpc-negative-one.bin"); // to compare with Java
|
|
363
|
+
sketch.serialize(os);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
TEST_CASE("cpc sketch: update float equivalence", "[cpc_sketch]") {
|
|
367
|
+
cpc_sketch sketch(11);
|
|
368
|
+
sketch.update((float) 1);
|
|
369
|
+
sketch.update((double) 1);
|
|
370
|
+
REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
TEST_CASE("cpc sketch: update string equivalence", "[cpc_sketch]") {
|
|
374
|
+
cpc_sketch sketch(11);
|
|
375
|
+
const std::string a("a");
|
|
376
|
+
sketch.update(a);
|
|
377
|
+
sketch.update(a.c_str(), a.length());
|
|
378
|
+
REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
} /* namespace datasketches */
|