datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(fi_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(fi_test fi common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(fi_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" FI_TEST_BINARY_PATH)
|
|
28
|
+
string(APPEND FI_TEST_BINARY_PATH "/")
|
|
29
|
+
target_compile_definitions(fi_test
|
|
30
|
+
PRIVATE
|
|
31
|
+
TEST_BINARY_INPUT_PATH="${FI_TEST_BINARY_PATH}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME fi_test
|
|
36
|
+
COMMAND fi_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(fi_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
reverse_purge_hash_map_test.cpp
|
|
42
|
+
frequent_items_sketch_test.cpp
|
|
43
|
+
frequent_items_sketch_custom_type_test.cpp
|
|
44
|
+
)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <sstream>
|
|
22
|
+
|
|
23
|
+
#include "frequent_items_sketch.hpp"
|
|
24
|
+
#include "test_type.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
typedef frequent_items_sketch<test_type, float, test_type_hash, test_type_equal, test_type_serde> frequent_test_type_sketch;
|
|
29
|
+
|
|
30
|
+
TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
|
31
|
+
frequent_test_type_sketch sketch(3);
|
|
32
|
+
sketch.update(1, 10); // should survive the purge
|
|
33
|
+
sketch.update(2);
|
|
34
|
+
sketch.update(3);
|
|
35
|
+
sketch.update(4);
|
|
36
|
+
sketch.update(5);
|
|
37
|
+
sketch.update(6);
|
|
38
|
+
sketch.update(7);
|
|
39
|
+
test_type a8(8);
|
|
40
|
+
sketch.update(a8);
|
|
41
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
42
|
+
REQUIRE(sketch.get_total_weight() == 17);
|
|
43
|
+
REQUIRE(sketch.get_estimate(1) == 10);
|
|
44
|
+
//std::cerr << "num active: " << sketch.get_num_active_items() << std::endl;
|
|
45
|
+
|
|
46
|
+
//std::cerr << "get frequent items" << std::endl;
|
|
47
|
+
auto items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES);
|
|
48
|
+
REQUIRE(items.size() == 1); // only 1 item should be above threshold
|
|
49
|
+
REQUIRE(items[0].get_item().get_value() == 1);
|
|
50
|
+
REQUIRE(items[0].get_estimate() == 10);
|
|
51
|
+
|
|
52
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
53
|
+
//std::cerr << "serialize" << std::endl;
|
|
54
|
+
sketch.serialize(s);
|
|
55
|
+
//std::cerr << "deserialize" << std::endl;
|
|
56
|
+
auto sketch2 = frequent_test_type_sketch::deserialize(s);
|
|
57
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
58
|
+
REQUIRE(sketch2.get_total_weight() == 17);
|
|
59
|
+
REQUIRE(sketch2.get_estimate(1) == 10);
|
|
60
|
+
REQUIRE(sketch.get_num_active_items() == sketch2.get_num_active_items());
|
|
61
|
+
REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());
|
|
62
|
+
//std::cerr << "end" << std::endl;
|
|
63
|
+
|
|
64
|
+
std::cout << sketch2.to_string(true);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// this is to see the debug print from test_type if enabled there to make sure items are moved
|
|
68
|
+
TEST_CASE("frequent items: moving merge", "[frequent_items_sketch]") {
|
|
69
|
+
frequent_test_type_sketch sketch1(3);
|
|
70
|
+
sketch1.update(1);
|
|
71
|
+
|
|
72
|
+
frequent_test_type_sketch sketch2(3);
|
|
73
|
+
sketch2.update(2);
|
|
74
|
+
|
|
75
|
+
sketch2.merge(std::move(sketch1));
|
|
76
|
+
REQUIRE(sketch2.get_total_weight() == 2);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
TEST_CASE("frequent items: negative weight", "[frequent_items_sketch]") {
|
|
80
|
+
frequent_test_type_sketch sketch(3);
|
|
81
|
+
REQUIRE_THROWS_AS(sketch.update(1, -1), std::invalid_argument);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <sstream>
|
|
22
|
+
#include <fstream>
|
|
23
|
+
|
|
24
|
+
#include "frequent_items_sketch.hpp"
|
|
25
|
+
|
|
26
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
27
|
+
static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
|
|
28
|
+
#else
|
|
29
|
+
static std::string testBinaryInputPath = "test/";
|
|
30
|
+
#endif
|
|
31
|
+
|
|
32
|
+
namespace datasketches {
|
|
33
|
+
|
|
34
|
+
TEST_CASE("frequent items: invalid k", "[frequent_items_sketch]") {
|
|
35
|
+
REQUIRE_THROWS_AS(frequent_items_sketch<int>(2), std::invalid_argument);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
TEST_CASE("frequent items: empty", "[frequent_items_sketch]") {
|
|
39
|
+
frequent_items_sketch<int> sketch(3);
|
|
40
|
+
REQUIRE(sketch.is_empty());
|
|
41
|
+
REQUIRE(sketch.get_num_active_items() == 0);
|
|
42
|
+
REQUIRE(sketch.get_total_weight() == 0);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
TEST_CASE("frequent items: one item", "[frequent_items_sketch]") {
|
|
46
|
+
frequent_items_sketch<std::string> sketch(3);
|
|
47
|
+
sketch.update("a");
|
|
48
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
49
|
+
REQUIRE(sketch.get_num_active_items() == 1);
|
|
50
|
+
REQUIRE(sketch.get_total_weight() == 1);
|
|
51
|
+
REQUIRE(sketch.get_estimate("a") == 1);
|
|
52
|
+
REQUIRE(sketch.get_lower_bound("a") == 1);
|
|
53
|
+
REQUIRE(sketch.get_upper_bound("a") == 1);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items_sketch]") {
|
|
57
|
+
frequent_items_sketch<std::string> sketch(3);
|
|
58
|
+
sketch.update("a");
|
|
59
|
+
sketch.update("b");
|
|
60
|
+
sketch.update("c");
|
|
61
|
+
sketch.update("d");
|
|
62
|
+
sketch.update("b");
|
|
63
|
+
sketch.update("c");
|
|
64
|
+
sketch.update("b");
|
|
65
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
66
|
+
REQUIRE(sketch.get_total_weight() == 7);
|
|
67
|
+
REQUIRE(sketch.get_num_active_items() == 4);
|
|
68
|
+
REQUIRE(sketch.get_estimate("a") == 1);
|
|
69
|
+
REQUIRE(sketch.get_estimate("b") == 3);
|
|
70
|
+
REQUIRE(sketch.get_estimate("c") == 2);
|
|
71
|
+
REQUIRE(sketch.get_estimate("d") == 1);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") {
|
|
75
|
+
frequent_items_sketch<std::string> sketch(4);
|
|
76
|
+
sketch.update("a");
|
|
77
|
+
sketch.update("b");
|
|
78
|
+
sketch.update("c");
|
|
79
|
+
sketch.update("d");
|
|
80
|
+
sketch.update("b");
|
|
81
|
+
sketch.update("c");
|
|
82
|
+
sketch.update("b");
|
|
83
|
+
sketch.update("e");
|
|
84
|
+
sketch.update("f");
|
|
85
|
+
sketch.update("g");
|
|
86
|
+
sketch.update("h");
|
|
87
|
+
sketch.update("i");
|
|
88
|
+
sketch.update("j");
|
|
89
|
+
sketch.update("k");
|
|
90
|
+
sketch.update("l");
|
|
91
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
92
|
+
REQUIRE(sketch.get_total_weight() == 15);
|
|
93
|
+
REQUIRE(sketch.get_num_active_items() == 12);
|
|
94
|
+
REQUIRE(sketch.get_estimate("a") == 1);
|
|
95
|
+
REQUIRE(sketch.get_estimate("b") == 3);
|
|
96
|
+
REQUIRE(sketch.get_estimate("c") == 2);
|
|
97
|
+
REQUIRE(sketch.get_estimate("d") == 1);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") {
|
|
101
|
+
frequent_items_sketch<int> sketch(3);
|
|
102
|
+
sketch.update(1, 10);
|
|
103
|
+
sketch.update(2);
|
|
104
|
+
sketch.update(3);
|
|
105
|
+
sketch.update(4);
|
|
106
|
+
sketch.update(5);
|
|
107
|
+
sketch.update(6);
|
|
108
|
+
sketch.update(7, 15);
|
|
109
|
+
sketch.update(8);
|
|
110
|
+
sketch.update(9);
|
|
111
|
+
sketch.update(10);
|
|
112
|
+
sketch.update(11);
|
|
113
|
+
sketch.update(12);
|
|
114
|
+
REQUIRE(sketch.get_maximum_error() > 0); // estimation mode
|
|
115
|
+
|
|
116
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
117
|
+
REQUIRE(sketch.get_total_weight() == 35);
|
|
118
|
+
|
|
119
|
+
auto items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES);
|
|
120
|
+
REQUIRE(items.size() == 2); // only 2 items (1 and 7) should have counts more than 1
|
|
121
|
+
REQUIRE(items[0].get_item() == 7);
|
|
122
|
+
REQUIRE(items[0].get_estimate() == 15);
|
|
123
|
+
REQUIRE(items[1].get_item() == 1);
|
|
124
|
+
REQUIRE(items[1].get_estimate() == 10);
|
|
125
|
+
|
|
126
|
+
items = sketch.get_frequent_items(frequent_items_error_type::NO_FALSE_NEGATIVES);
|
|
127
|
+
REQUIRE(2 <= items.size()); // at least 2 items
|
|
128
|
+
REQUIRE(12 >= items.size()); // but not more than 12 items
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") {
|
|
132
|
+
frequent_items_sketch<int> sketch1(3);
|
|
133
|
+
sketch1.update(1);
|
|
134
|
+
sketch1.update(2);
|
|
135
|
+
sketch1.update(3);
|
|
136
|
+
sketch1.update(4);
|
|
137
|
+
|
|
138
|
+
frequent_items_sketch<int> sketch2(3);
|
|
139
|
+
sketch1.update(2);
|
|
140
|
+
sketch1.update(3);
|
|
141
|
+
sketch1.update(2);
|
|
142
|
+
|
|
143
|
+
sketch1.merge(sketch2);
|
|
144
|
+
REQUIRE_FALSE(sketch1.is_empty());
|
|
145
|
+
REQUIRE(sketch1.get_total_weight() == 7);
|
|
146
|
+
REQUIRE(sketch1.get_num_active_items() == 4);
|
|
147
|
+
REQUIRE(sketch1.get_estimate(1) == 1);
|
|
148
|
+
REQUIRE(sketch1.get_estimate(2) == 3);
|
|
149
|
+
REQUIRE(sketch1.get_estimate(3) == 2);
|
|
150
|
+
REQUIRE(sketch1.get_estimate(4) == 1);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
|
|
154
|
+
frequent_items_sketch<int> sketch1(4);
|
|
155
|
+
sketch1.update(1, 9); // to make sure it survives the purge
|
|
156
|
+
sketch1.update(2);
|
|
157
|
+
sketch1.update(3);
|
|
158
|
+
sketch1.update(4);
|
|
159
|
+
sketch1.update(5);
|
|
160
|
+
sketch1.update(6);
|
|
161
|
+
sketch1.update(7);
|
|
162
|
+
sketch1.update(8);
|
|
163
|
+
sketch1.update(9);
|
|
164
|
+
sketch1.update(10);
|
|
165
|
+
sketch1.update(11);
|
|
166
|
+
sketch1.update(12);
|
|
167
|
+
sketch1.update(13);
|
|
168
|
+
sketch1.update(14);
|
|
169
|
+
REQUIRE(sketch1.get_maximum_error() > 0); // estimation mode
|
|
170
|
+
|
|
171
|
+
frequent_items_sketch<int> sketch2(4);
|
|
172
|
+
sketch2.update(8);
|
|
173
|
+
sketch2.update(9);
|
|
174
|
+
sketch2.update(10);
|
|
175
|
+
sketch2.update(11);
|
|
176
|
+
sketch2.update(12);
|
|
177
|
+
sketch2.update(13);
|
|
178
|
+
sketch2.update(14);
|
|
179
|
+
sketch2.update(15);
|
|
180
|
+
sketch2.update(16);
|
|
181
|
+
sketch2.update(17);
|
|
182
|
+
sketch2.update(18);
|
|
183
|
+
sketch2.update(19);
|
|
184
|
+
sketch2.update(20);
|
|
185
|
+
sketch2.update(21, 11); // to make sure it survives the purge
|
|
186
|
+
REQUIRE(sketch2.get_maximum_error() > 0); // estimation mode
|
|
187
|
+
|
|
188
|
+
sketch1.merge(sketch2);
|
|
189
|
+
REQUIRE_FALSE(sketch1.is_empty());
|
|
190
|
+
REQUIRE(sketch1.get_total_weight() == 46);
|
|
191
|
+
REQUIRE(2 <= sketch1.get_num_active_items());
|
|
192
|
+
|
|
193
|
+
auto items = sketch1.get_frequent_items(frequent_items_error_type::NO_FALSE_POSITIVES, 2);
|
|
194
|
+
REQUIRE(items.size() == 2); // only 2 items (1 and 21) should be above threshold
|
|
195
|
+
REQUIRE(items[0].get_item() == 21);
|
|
196
|
+
REQUIRE(11 <= items[0].get_estimate()); // always overestimated
|
|
197
|
+
REQUIRE(items[1].get_item() == 1);
|
|
198
|
+
REQUIRE(9 <= items[1].get_estimate()); // always overestimated
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
TEST_CASE("frequent items: deserialize from java long", "[frequent_items_sketch]") {
|
|
202
|
+
std::ifstream is;
|
|
203
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
204
|
+
is.open(testBinaryInputPath + "longs_sketch_from_java.sk", std::ios::binary);
|
|
205
|
+
auto sketch = frequent_items_sketch<long long>::deserialize(is);
|
|
206
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
207
|
+
REQUIRE(sketch.get_total_weight() == 4);
|
|
208
|
+
REQUIRE(sketch.get_num_active_items() == 4);
|
|
209
|
+
REQUIRE(sketch.get_estimate(1) == 1);
|
|
210
|
+
REQUIRE(sketch.get_estimate(2) == 1);
|
|
211
|
+
REQUIRE(sketch.get_estimate(3) == 1);
|
|
212
|
+
REQUIRE(sketch.get_estimate(4) == 1);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
TEST_CASE("frequent items: deserialize from java string", "[frequent_items_sketch]") {
|
|
216
|
+
std::ifstream is;
|
|
217
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
218
|
+
is.open(testBinaryInputPath + "items_sketch_string_from_java.sk", std::ios::binary);
|
|
219
|
+
auto sketch = frequent_items_sketch<std::string>::deserialize(is);
|
|
220
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
221
|
+
REQUIRE(sketch.get_total_weight() == 4);
|
|
222
|
+
REQUIRE(sketch.get_num_active_items() == 4);
|
|
223
|
+
REQUIRE(sketch.get_estimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa") == 1);
|
|
224
|
+
REQUIRE(sketch.get_estimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb") == 1);
|
|
225
|
+
REQUIRE(sketch.get_estimate("ccccccccccccccccccccccccccccc") == 1);
|
|
226
|
+
REQUIRE(sketch.get_estimate("ddddddddddddddddddddddddddddd") == 1);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
TEST_CASE("frequent items: deserialize from java string, utf-8", "[frequent_items_sketch]") {
|
|
230
|
+
std::ifstream is;
|
|
231
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
232
|
+
is.open(testBinaryInputPath + "items_sketch_string_utf8_from_java.sk", std::ios::binary);
|
|
233
|
+
auto sketch = frequent_items_sketch<std::string>::deserialize(is);
|
|
234
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
235
|
+
REQUIRE(sketch.get_total_weight() == 10);
|
|
236
|
+
REQUIRE(sketch.get_num_active_items() == 4);
|
|
237
|
+
REQUIRE(sketch.get_estimate("абвгд") == 1);
|
|
238
|
+
REQUIRE(sketch.get_estimate("еёжзи") == 2);
|
|
239
|
+
REQUIRE(sketch.get_estimate("йклмн") == 3);
|
|
240
|
+
REQUIRE(sketch.get_estimate("опрст") == 4);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
TEST_CASE("frequent items: deserialize long64 stream", "[frequent_items_sketch]") {
|
|
244
|
+
frequent_items_sketch<long long> sketch1(3);
|
|
245
|
+
sketch1.update(1, 1);
|
|
246
|
+
sketch1.update(2, 2);
|
|
247
|
+
sketch1.update(3, 3);
|
|
248
|
+
sketch1.update(4, 4);
|
|
249
|
+
sketch1.update(5, 5);
|
|
250
|
+
|
|
251
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
252
|
+
sketch1.serialize(s);
|
|
253
|
+
auto sketch2 = frequent_items_sketch<long long>::deserialize(s);
|
|
254
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
255
|
+
REQUIRE(sketch2.get_total_weight() == 15);
|
|
256
|
+
REQUIRE(sketch2.get_num_active_items() == 5);
|
|
257
|
+
REQUIRE(sketch2.get_estimate(1) == 1);
|
|
258
|
+
REQUIRE(sketch2.get_estimate(2) == 2);
|
|
259
|
+
REQUIRE(sketch2.get_estimate(3) == 3);
|
|
260
|
+
REQUIRE(sketch2.get_estimate(4) == 4);
|
|
261
|
+
REQUIRE(sketch2.get_estimate(5) == 5);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
TEST_CASE("frequent items: serialize deserialiation long64 bytes", "[frequent_items_sketch]") {
|
|
265
|
+
frequent_items_sketch<long long> sketch1(3);
|
|
266
|
+
sketch1.update(1, 1);
|
|
267
|
+
sketch1.update(2, 2);
|
|
268
|
+
sketch1.update(3, 3);
|
|
269
|
+
sketch1.update(4, 4);
|
|
270
|
+
sketch1.update(5, 5);
|
|
271
|
+
|
|
272
|
+
auto bytes = sketch1.serialize();
|
|
273
|
+
auto sketch2 = frequent_items_sketch<long long>::deserialize(bytes.data(), bytes.size());
|
|
274
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
275
|
+
REQUIRE(sketch2.get_total_weight() == 15);
|
|
276
|
+
REQUIRE(sketch2.get_num_active_items() == 5);
|
|
277
|
+
REQUIRE(sketch2.get_estimate(1) == 1);
|
|
278
|
+
REQUIRE(sketch2.get_estimate(2) == 2);
|
|
279
|
+
REQUIRE(sketch2.get_estimate(3) == 3);
|
|
280
|
+
REQUIRE(sketch2.get_estimate(4) == 4);
|
|
281
|
+
REQUIRE(sketch2.get_estimate(5) == 5);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
TEST_CASE("frequent items: serialize deserialize string stream", "[frequent_items_sketch]") {
|
|
285
|
+
frequent_items_sketch<std::string> sketch1(3);
|
|
286
|
+
sketch1.update("aaaaaaaaaaaaaaaa", 1);
|
|
287
|
+
sketch1.update("bbbbbbbbbbbbbbbb", 2);
|
|
288
|
+
sketch1.update("cccccccccccccccc", 3);
|
|
289
|
+
sketch1.update("dddddddddddddddd", 4);
|
|
290
|
+
sketch1.update("eeeeeeeeeeeeeeee", 5);
|
|
291
|
+
|
|
292
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
293
|
+
sketch1.serialize(s);
|
|
294
|
+
auto sketch2 = frequent_items_sketch<std::string>::deserialize(s);
|
|
295
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
296
|
+
REQUIRE(sketch2.get_total_weight() == 15);
|
|
297
|
+
REQUIRE(sketch2.get_num_active_items() == 5);
|
|
298
|
+
REQUIRE(sketch2.get_estimate("aaaaaaaaaaaaaaaa") == 1);
|
|
299
|
+
REQUIRE(sketch2.get_estimate("bbbbbbbbbbbbbbbb") == 2);
|
|
300
|
+
REQUIRE(sketch2.get_estimate("cccccccccccccccc") == 3);
|
|
301
|
+
REQUIRE(sketch2.get_estimate("dddddddddddddddd") == 4);
|
|
302
|
+
REQUIRE(sketch2.get_estimate("eeeeeeeeeeeeeeee") == 5);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
TEST_CASE("frequent items: serialize deserialize string bytes", "[frequent_items_sketch]") {
|
|
306
|
+
frequent_items_sketch<std::string> sketch1(3);
|
|
307
|
+
sketch1.update("aaaaaaaaaaaaaaaa", 1);
|
|
308
|
+
sketch1.update("bbbbbbbbbbbbbbbb", 2);
|
|
309
|
+
sketch1.update("cccccccccccccccc", 3);
|
|
310
|
+
sketch1.update("dddddddddddddddd", 4);
|
|
311
|
+
sketch1.update("eeeeeeeeeeeeeeee", 5);
|
|
312
|
+
|
|
313
|
+
auto bytes = sketch1.serialize();
|
|
314
|
+
auto sketch2 = frequent_items_sketch<std::string>::deserialize(bytes.data(), bytes.size());
|
|
315
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
316
|
+
REQUIRE(sketch2.get_total_weight() == 15);
|
|
317
|
+
REQUIRE(sketch2.get_num_active_items() == 5);
|
|
318
|
+
REQUIRE(sketch2.get_estimate("aaaaaaaaaaaaaaaa") == 1);
|
|
319
|
+
REQUIRE(sketch2.get_estimate("bbbbbbbbbbbbbbbb") == 2);
|
|
320
|
+
REQUIRE(sketch2.get_estimate("cccccccccccccccc") == 3);
|
|
321
|
+
REQUIRE(sketch2.get_estimate("dddddddddddddddd") == 4);
|
|
322
|
+
REQUIRE(sketch2.get_estimate("eeeeeeeeeeeeeeee") == 5);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
TEST_CASE("frequent items: serialize deserialize string, utf-8 stream", "[frequent_items_sketch]") {
|
|
326
|
+
frequent_items_sketch<std::string> sketch1(3);
|
|
327
|
+
sketch1.update("абвгд", 1);
|
|
328
|
+
sketch1.update("еёжзи", 2);
|
|
329
|
+
sketch1.update("йклмн", 3);
|
|
330
|
+
sketch1.update("опрст", 4);
|
|
331
|
+
sketch1.update("уфхцч", 5);
|
|
332
|
+
|
|
333
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
334
|
+
sketch1.serialize(s);
|
|
335
|
+
auto sketch2 = frequent_items_sketch<std::string>::deserialize(s);
|
|
336
|
+
REQUIRE_FALSE(sketch2.is_empty());
|
|
337
|
+
REQUIRE(sketch2.get_total_weight() == 15);
|
|
338
|
+
REQUIRE(sketch2.get_num_active_items() == 5);
|
|
339
|
+
REQUIRE(sketch2.get_estimate("абвгд") == 1);
|
|
340
|
+
REQUIRE(sketch2.get_estimate("еёжзи") == 2);
|
|
341
|
+
REQUIRE(sketch2.get_estimate("йклмн") == 3);
|
|
342
|
+
REQUIRE(sketch2.get_estimate("опрст") == 4);
|
|
343
|
+
REQUIRE(sketch2.get_estimate("уфхцч") == 5);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
TEST_CASE("frequent items: int64 deserialize single item buffer overrun", "[frequent_items_sketch]") {
|
|
347
|
+
frequent_items_sketch<int64_t> sketch(3);
|
|
348
|
+
sketch.update(1);
|
|
349
|
+
auto bytes = sketch.serialize();
|
|
350
|
+
REQUIRE_THROWS_AS(frequent_items_sketch<int64_t>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
TEST_CASE("frequent items: string deserialize single item buffer overrun", "[frequent_items_sketch]") {
|
|
354
|
+
frequent_items_sketch<std::string> sketch(3);
|
|
355
|
+
sketch.update("a");
|
|
356
|
+
auto bytes = sketch.serialize();
|
|
357
|
+
REQUIRE_THROWS_AS(frequent_items_sketch<std::string>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
} /* namespace datasketches */
|