datasketches 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +3 -3
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
- data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
- data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +61 -79
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 4.
|
19
|
+
version: '4.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 4.
|
26
|
+
version: '4.1'
|
27
27
|
description:
|
28
28
|
email: andrew@ankane.org
|
29
29
|
executables: []
|
@@ -47,8 +47,10 @@ files:
|
|
47
47
|
- lib/datasketches.rb
|
48
48
|
- lib/datasketches/version.rb
|
49
49
|
- vendor/datasketches-cpp/CMakeLists.txt
|
50
|
+
- vendor/datasketches-cpp/CODE_OF_CONDUCT.md
|
51
|
+
- vendor/datasketches-cpp/CONTRIBUTING.md
|
52
|
+
- vendor/datasketches-cpp/Doxyfile
|
50
53
|
- vendor/datasketches-cpp/LICENSE
|
51
|
-
- vendor/datasketches-cpp/MANIFEST.in
|
52
54
|
- vendor/datasketches-cpp/NOTICE
|
53
55
|
- vendor/datasketches-cpp/README.md
|
54
56
|
- vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
|
@@ -65,6 +67,7 @@ files:
|
|
65
67
|
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
|
66
68
|
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
|
67
69
|
- vendor/datasketches-cpp/common/include/memory_operations.hpp
|
70
|
+
- vendor/datasketches-cpp/common/include/optional.hpp
|
68
71
|
- vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
|
69
72
|
- vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
|
70
73
|
- vendor/datasketches-cpp/common/include/serde.hpp
|
@@ -72,10 +75,17 @@ files:
|
|
72
75
|
- vendor/datasketches-cpp/common/test/CMakeLists.txt
|
73
76
|
- vendor/datasketches-cpp/common/test/catch_runner.cpp
|
74
77
|
- vendor/datasketches-cpp/common/test/integration_test.cpp
|
78
|
+
- vendor/datasketches-cpp/common/test/optional_test.cpp
|
75
79
|
- vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
|
76
80
|
- vendor/datasketches-cpp/common/test/test_allocator.cpp
|
77
81
|
- vendor/datasketches-cpp/common/test/test_allocator.hpp
|
78
82
|
- vendor/datasketches-cpp/common/test/test_type.hpp
|
83
|
+
- vendor/datasketches-cpp/count/CMakeLists.txt
|
84
|
+
- vendor/datasketches-cpp/count/include/count_min.hpp
|
85
|
+
- vendor/datasketches-cpp/count/include/count_min_impl.hpp
|
86
|
+
- vendor/datasketches-cpp/count/test/CMakeLists.txt
|
87
|
+
- vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp
|
88
|
+
- vendor/datasketches-cpp/count/test/count_min_test.cpp
|
79
89
|
- vendor/datasketches-cpp/cpc/CMakeLists.txt
|
80
90
|
- vendor/datasketches-cpp/cpc/include/compression_data.hpp
|
81
91
|
- vendor/datasketches-cpp/cpc/include/cpc_common.hpp
|
@@ -94,8 +104,15 @@ files:
|
|
94
104
|
- vendor/datasketches-cpp/cpc/test/CMakeLists.txt
|
95
105
|
- vendor/datasketches-cpp/cpc/test/compression_test.cpp
|
96
106
|
- vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
|
107
|
+
- vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp
|
108
|
+
- vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp
|
97
109
|
- vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
|
98
110
|
- vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
|
111
|
+
- vendor/datasketches-cpp/density/CMakeLists.txt
|
112
|
+
- vendor/datasketches-cpp/density/include/density_sketch.hpp
|
113
|
+
- vendor/datasketches-cpp/density/include/density_sketch_impl.hpp
|
114
|
+
- vendor/datasketches-cpp/density/test/CMakeLists.txt
|
115
|
+
- vendor/datasketches-cpp/density/test/density_sketch_test.cpp
|
99
116
|
- vendor/datasketches-cpp/fi/CMakeLists.txt
|
100
117
|
- vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp
|
101
118
|
- vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp
|
@@ -103,10 +120,9 @@ files:
|
|
103
120
|
- vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp
|
104
121
|
- vendor/datasketches-cpp/fi/test/CMakeLists.txt
|
105
122
|
- vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp
|
123
|
+
- vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp
|
124
|
+
- vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp
|
106
125
|
- vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp
|
107
|
-
- vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk
|
108
|
-
- vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk
|
109
|
-
- vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk
|
110
126
|
- vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp
|
111
127
|
- vendor/datasketches-cpp/hll/CMakeLists.txt
|
112
128
|
- vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp
|
@@ -152,12 +168,8 @@ files:
|
|
152
168
|
- vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp
|
153
169
|
- vendor/datasketches-cpp/hll/test/TablesTest.cpp
|
154
170
|
- vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp
|
155
|
-
- vendor/datasketches-cpp/hll/test/
|
156
|
-
- vendor/datasketches-cpp/hll/test/
|
157
|
-
- vendor/datasketches-cpp/hll/test/compact_set_from_java.sk
|
158
|
-
- vendor/datasketches-cpp/hll/test/list_from_java.sk
|
159
|
-
- vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk
|
160
|
-
- vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk
|
171
|
+
- vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp
|
172
|
+
- vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp
|
161
173
|
- vendor/datasketches-cpp/kll/CMakeLists.txt
|
162
174
|
- vendor/datasketches-cpp/kll/include/kll_helper.hpp
|
163
175
|
- vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
|
@@ -165,46 +177,12 @@ files:
|
|
165
177
|
- vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
|
166
178
|
- vendor/datasketches-cpp/kll/test/CMakeLists.txt
|
167
179
|
- vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
|
180
|
+
- vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp
|
168
181
|
- vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
|
169
|
-
- vendor/datasketches-cpp/kll/test/
|
182
|
+
- vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp
|
170
183
|
- vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp
|
171
184
|
- vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp
|
172
185
|
- vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp
|
173
|
-
- vendor/datasketches-cpp/pyproject.toml
|
174
|
-
- vendor/datasketches-cpp/python/CMakeLists.txt
|
175
|
-
- vendor/datasketches-cpp/python/README.md
|
176
|
-
- vendor/datasketches-cpp/python/datasketches/PySerDe.py
|
177
|
-
- vendor/datasketches-cpp/python/datasketches/__init__.py
|
178
|
-
- vendor/datasketches-cpp/python/include/py_serde.hpp
|
179
|
-
- vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
|
180
|
-
- vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
|
181
|
-
- vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
|
182
|
-
- vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb
|
183
|
-
- vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
|
184
|
-
- vendor/datasketches-cpp/python/pybind11Path.cmd
|
185
|
-
- vendor/datasketches-cpp/python/src/__init__.py
|
186
|
-
- vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
|
187
|
-
- vendor/datasketches-cpp/python/src/datasketches.cpp
|
188
|
-
- vendor/datasketches-cpp/python/src/fi_wrapper.cpp
|
189
|
-
- vendor/datasketches-cpp/python/src/hll_wrapper.cpp
|
190
|
-
- vendor/datasketches-cpp/python/src/kll_wrapper.cpp
|
191
|
-
- vendor/datasketches-cpp/python/src/ks_wrapper.cpp
|
192
|
-
- vendor/datasketches-cpp/python/src/py_serde.cpp
|
193
|
-
- vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
|
194
|
-
- vendor/datasketches-cpp/python/src/req_wrapper.cpp
|
195
|
-
- vendor/datasketches-cpp/python/src/theta_wrapper.cpp
|
196
|
-
- vendor/datasketches-cpp/python/src/vector_of_kll.cpp
|
197
|
-
- vendor/datasketches-cpp/python/src/vo_wrapper.cpp
|
198
|
-
- vendor/datasketches-cpp/python/tests/__init__.py
|
199
|
-
- vendor/datasketches-cpp/python/tests/cpc_test.py
|
200
|
-
- vendor/datasketches-cpp/python/tests/fi_test.py
|
201
|
-
- vendor/datasketches-cpp/python/tests/hll_test.py
|
202
|
-
- vendor/datasketches-cpp/python/tests/kll_test.py
|
203
|
-
- vendor/datasketches-cpp/python/tests/quantiles_test.py
|
204
|
-
- vendor/datasketches-cpp/python/tests/req_test.py
|
205
|
-
- vendor/datasketches-cpp/python/tests/theta_test.py
|
206
|
-
- vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
|
207
|
-
- vendor/datasketches-cpp/python/tests/vo_test.py
|
208
186
|
- vendor/datasketches-cpp/quantiles/CMakeLists.txt
|
209
187
|
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
|
210
188
|
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
|
@@ -219,6 +197,8 @@ files:
|
|
219
197
|
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
|
220
198
|
- vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
|
221
199
|
- vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
|
200
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp
|
201
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp
|
222
202
|
- vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
|
223
203
|
- vendor/datasketches-cpp/req/CMakeLists.txt
|
224
204
|
- vendor/datasketches-cpp/req/include/req_common.hpp
|
@@ -227,28 +207,32 @@ files:
|
|
227
207
|
- vendor/datasketches-cpp/req/include/req_sketch.hpp
|
228
208
|
- vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
|
229
209
|
- vendor/datasketches-cpp/req/test/CMakeLists.txt
|
230
|
-
- vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk
|
231
|
-
- vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk
|
232
|
-
- vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk
|
233
|
-
- vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk
|
234
|
-
- vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk
|
235
210
|
- vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp
|
211
|
+
- vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp
|
212
|
+
- vendor/datasketches-cpp/req/test/req_sketch_serialize_for_java.cpp
|
236
213
|
- vendor/datasketches-cpp/req/test/req_sketch_test.cpp
|
237
214
|
- vendor/datasketches-cpp/sampling/CMakeLists.txt
|
215
|
+
- vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp
|
216
|
+
- vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp
|
217
|
+
- vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp
|
218
|
+
- vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp
|
238
219
|
- vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp
|
239
220
|
- vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp
|
240
221
|
- vendor/datasketches-cpp/sampling/include/var_opt_union.hpp
|
241
222
|
- vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp
|
242
223
|
- vendor/datasketches-cpp/sampling/test/CMakeLists.txt
|
243
|
-
- vendor/datasketches-cpp/sampling/test/
|
224
|
+
- vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp
|
225
|
+
- vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp
|
226
|
+
- vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp
|
244
227
|
- vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp
|
228
|
+
- vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp
|
229
|
+
- vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp
|
245
230
|
- vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp
|
231
|
+
- vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp
|
232
|
+
- vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp
|
246
233
|
- vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp
|
247
|
-
- vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk
|
248
|
-
- vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk
|
249
|
-
- vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
|
250
|
-
- vendor/datasketches-cpp/setup.py
|
251
234
|
- vendor/datasketches-cpp/theta/CMakeLists.txt
|
235
|
+
- vendor/datasketches-cpp/theta/include/bit_packing.hpp
|
252
236
|
- vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
|
253
237
|
- vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp
|
254
238
|
- vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp
|
@@ -275,30 +259,29 @@ files:
|
|
275
259
|
- vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp
|
276
260
|
- vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp
|
277
261
|
- vendor/datasketches-cpp/theta/test/CMakeLists.txt
|
262
|
+
- vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
|
278
263
|
- vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
|
279
|
-
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
|
280
264
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
|
281
265
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
|
282
|
-
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
|
283
266
|
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
|
284
267
|
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
|
285
|
-
- vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
|
286
|
-
- vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
|
287
268
|
- vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
|
288
269
|
- vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
|
289
270
|
- vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
|
271
|
+
- vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp
|
272
|
+
- vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp
|
290
273
|
- vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
|
291
274
|
- vendor/datasketches-cpp/theta/test/theta_union_test.cpp
|
292
|
-
- vendor/datasketches-cpp/tox.ini
|
293
275
|
- vendor/datasketches-cpp/tuple/CMakeLists.txt
|
294
|
-
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
|
295
|
-
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
|
296
|
-
- vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp
|
297
|
-
- vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp
|
298
276
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp
|
299
|
-
- vendor/datasketches-cpp/tuple/include/
|
300
|
-
- vendor/datasketches-cpp/tuple/include/
|
301
|
-
- vendor/datasketches-cpp/tuple/include/
|
277
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b.hpp
|
278
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b_impl.hpp
|
279
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp
|
280
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp
|
281
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp
|
282
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_sketch_impl.hpp
|
283
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp
|
284
|
+
- vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp
|
302
285
|
- vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp
|
303
286
|
- vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp
|
304
287
|
- vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp
|
@@ -309,17 +292,16 @@ files:
|
|
309
292
|
- vendor/datasketches-cpp/tuple/include/tuple_union.hpp
|
310
293
|
- vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp
|
311
294
|
- vendor/datasketches-cpp/tuple/test/CMakeLists.txt
|
312
|
-
- vendor/datasketches-cpp/tuple/test/
|
313
|
-
- vendor/datasketches-cpp/tuple/test/
|
314
|
-
- vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk
|
315
|
-
- vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
|
316
|
-
- vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
|
295
|
+
- vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp
|
296
|
+
- vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp
|
317
297
|
- vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
|
318
298
|
- vendor/datasketches-cpp/tuple/test/engagement_test.cpp
|
319
299
|
- vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
|
320
300
|
- vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
|
321
301
|
- vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
|
322
302
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
|
303
|
+
- vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp
|
304
|
+
- vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp
|
323
305
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
|
324
306
|
- vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
|
325
307
|
- vendor/datasketches-cpp/version.cfg.in
|
@@ -335,14 +317,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
335
317
|
requirements:
|
336
318
|
- - ">="
|
337
319
|
- !ruby/object:Gem::Version
|
338
|
-
version: '
|
320
|
+
version: '3'
|
339
321
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
340
322
|
requirements:
|
341
323
|
- - ">="
|
342
324
|
- !ruby/object:Gem::Version
|
343
325
|
version: '0'
|
344
326
|
requirements: []
|
345
|
-
rubygems_version: 3.4.
|
327
|
+
rubygems_version: 3.4.10
|
346
328
|
signing_key:
|
347
329
|
specification_version: 4
|
348
330
|
summary: Sketch data structures for Ruby
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
[build-system]
|
19
|
-
requires = ["wheel",
|
20
|
-
"setuptools >= 30.3.0",
|
21
|
-
"cmake >= 3.16",
|
22
|
-
"pybind11[global] >= 2.6.0"]
|
23
|
-
build-backend = "setuptools.build_meta"
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
|
19
|
-
find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
|
20
|
-
else()
|
21
|
-
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
22
|
-
endif()
|
23
|
-
|
24
|
-
# only Windows+MSVC seems to have trouble locating pybind11
|
25
|
-
if (MSVC)
|
26
|
-
execute_process(COMMAND cmd.exe /c ${CMAKE_CURRENT_SOURCE_DIR}/pybind11Path.cmd "${Python3_EXECUTABLE}"
|
27
|
-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
28
|
-
OUTPUT_STRIP_TRAILING_WHITESPACE
|
29
|
-
OUTPUT_VARIABLE EXTRA_PACKAGE_PATH)
|
30
|
-
set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${EXTRA_PACKAGE_PATH})
|
31
|
-
endif()
|
32
|
-
|
33
|
-
find_package(pybind11 CONFIG REQUIRED)
|
34
|
-
|
35
|
-
pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
|
36
|
-
|
37
|
-
target_link_libraries(python
|
38
|
-
PRIVATE
|
39
|
-
common
|
40
|
-
hll
|
41
|
-
kll
|
42
|
-
cpc
|
43
|
-
fi
|
44
|
-
theta
|
45
|
-
sampling
|
46
|
-
req
|
47
|
-
quantiles
|
48
|
-
pybind11::module
|
49
|
-
)
|
50
|
-
|
51
|
-
set_target_properties(python PROPERTIES
|
52
|
-
PREFIX ""
|
53
|
-
OUTPUT_NAME _datasketches
|
54
|
-
)
|
55
|
-
|
56
|
-
target_include_directories(python
|
57
|
-
PUBLIC
|
58
|
-
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
59
|
-
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
60
|
-
)
|
61
|
-
|
62
|
-
# ensure we make a .so on Mac rather than .dylib
|
63
|
-
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
64
|
-
set_target_properties(python PROPERTIES SUFFIX ".so")
|
65
|
-
endif()
|
66
|
-
|
67
|
-
target_sources(python
|
68
|
-
PRIVATE
|
69
|
-
src/datasketches.cpp
|
70
|
-
src/hll_wrapper.cpp
|
71
|
-
src/kll_wrapper.cpp
|
72
|
-
src/cpc_wrapper.cpp
|
73
|
-
src/fi_wrapper.cpp
|
74
|
-
src/theta_wrapper.cpp
|
75
|
-
src/vo_wrapper.cpp
|
76
|
-
src/req_wrapper.cpp
|
77
|
-
src/quantiles_wrapper.cpp
|
78
|
-
src/ks_wrapper.cpp
|
79
|
-
src/vector_of_kll.cpp
|
80
|
-
src/py_serde.cpp
|
81
|
-
)
|
@@ -1,85 +0,0 @@
|
|
1
|
-
<img src="https://raw.githubusercontent.com/apache/datasketches-website/master/logos/svg/datasketches-HorizontalColor-TM.svg" width="75%" alt="Apache DataSketchs Logo">
|
2
|
-
|
3
|
-
# The Apache DataSketches Library for Python
|
4
|
-
|
5
|
-
This is the official version of the [Apache DataSketches](https://datasketches.apache.org) Python library.
|
6
|
-
|
7
|
-
In the analysis of big data there are often problem queries that don’t scale because they require huge compute resources and time to generate exact results. Examples include count distinct, quantiles, most-frequent items, joins, matrix computations, and graph analysis.
|
8
|
-
|
9
|
-
If approximate results are acceptable, there is a class of specialized algorithms, called streaming algorithms, or sketches that can produce results orders-of magnitude faster and with mathematically proven error bounds. For interactive queries there may not be other viable alternatives, and in the case of real-time analysis, sketches are the only known solution.
|
10
|
-
|
11
|
-
This package provides a variety of sketches as described below. Wherever a specific type of sketch exists in Apache DataSketches packages for other languages, the sketches will be portable between languages (for platforms with the same endianness).
|
12
|
-
|
13
|
-
## Building and Installation
|
14
|
-
|
15
|
-
Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
|
16
|
-
|
17
|
-
If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
|
18
|
-
|
19
|
-
The library is also available from PyPI via `python3 -m pip install datasketches`.
|
20
|
-
|
21
|
-
## Usage
|
22
|
-
|
23
|
-
Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
|
24
|
-
|
25
|
-
The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
|
26
|
-
|
27
|
-
## Available Sketch Classes
|
28
|
-
|
29
|
-
- KLL (Absolute Error Quantiles)
|
30
|
-
- `kll_ints_sketch`
|
31
|
-
- `kll_floats_sketch`
|
32
|
-
- `kll_doubles_sketch`
|
33
|
-
- Quantiles (Absolute Error Quantiles, inferior algorithm)
|
34
|
-
- `quantiles_ints_sketch`
|
35
|
-
- `quantiles_floats_sketch`
|
36
|
-
- `quantiles_doubles_sketch`
|
37
|
-
- REQ (Relative Error Quantiles)
|
38
|
-
- `req_ints_sketch`
|
39
|
-
- `req_floats_sketch`
|
40
|
-
- Frequent Items
|
41
|
-
- `frequent_strings_sketch`
|
42
|
-
- Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
|
43
|
-
- Theta
|
44
|
-
- `update_theta_sketch`
|
45
|
-
- `compact_theta_sketch` (cannot be instantiated directly)
|
46
|
-
- `theta_union`
|
47
|
-
- `theta_intersection`
|
48
|
-
- `theta_a_not_b`
|
49
|
-
- HLL
|
50
|
-
- `hll_sketch`
|
51
|
-
- `hll_union`
|
52
|
-
- Target HLL types are `tgt_hll_type.{HLL_4 | HLL_6 | HLL_8}`
|
53
|
-
- CPC
|
54
|
-
- `cpc_sketch`
|
55
|
-
- `cpc_union`
|
56
|
-
- VarOpt Sampling
|
57
|
-
- `var_opt_sketch`
|
58
|
-
- `var_opt_union`
|
59
|
-
- Vector of KLL
|
60
|
-
- `vector_of_kll_ints_sketches`
|
61
|
-
- `vector_of_kll_floats_sketches`
|
62
|
-
- Kolmogorov-Smirnov Test
|
63
|
-
- `ks_test` applied to a pair of matched-type Absolute Error quantiles sketches
|
64
|
-
|
65
|
-
## Known Differences from C++
|
66
|
-
|
67
|
-
The Python API largely mirrors the C++ API, with a few minor exceptions: The primary known differences are that Python on modern platforms does not support unsigned integer values or numeric values with fewer than 64 bits. As a result, you may not be able to produce identical sketches from within Python as you can with Java and C++. Loading those sketches after they have been serialized from another language will work as expected.
|
68
|
-
|
69
|
-
The Vector of KLL object is currently exclusive to python, and holds an array of independent KLL sketches. This is useful for creating a set of KLL sketches over a vector and has been designed to allow input as either a vector or a matrix of multiple vectors.
|
70
|
-
|
71
|
-
We have also removed reliance on a builder class for theta sketches as Python allows named arguments to the constructor, not strictly positional arguments.
|
72
|
-
|
73
|
-
## Developer Instructions
|
74
|
-
|
75
|
-
The only developer-specific instructions relate to running unit tests.
|
76
|
-
|
77
|
-
### Unit tests
|
78
|
-
|
79
|
-
The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run the unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
|
80
|
-
|
81
|
-
## License
|
82
|
-
|
83
|
-
The Apache DataSketches Library is distrubted under an Apache 2.0 License.
|
84
|
-
|
85
|
-
There may be precompiled binaries provided as a convenience and distributed through PyPI via [https://pypi.org/project/datasketches/] contain compiled code from [pybind11](https://github.com/pybind/pybind11), which is distributed under a BSD license.
|
@@ -1,104 +0,0 @@
|
|
1
|
-
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
-
# or more contributor license agreements. See the NOTICE file
|
3
|
-
# distributed with this work for additional information
|
4
|
-
# regarding copyright ownership. The ASF licenses this file
|
5
|
-
# to you under the Apache License, Version 2.0 (the
|
6
|
-
# "License"); you may not use this file except in compliance
|
7
|
-
# with the License. You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
# Unless required by applicable law or agreed to in writing,
|
12
|
-
# software distributed under the License is distributed on an
|
13
|
-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
-
# KIND, either express or implied. See the License for the
|
15
|
-
# specific language governing permissions and limitations
|
16
|
-
# under the License.
|
17
|
-
|
18
|
-
from _datasketches import PyObjectSerDe
|
19
|
-
|
20
|
-
import struct
|
21
|
-
|
22
|
-
# This file provides several Python SerDe implementation examples.
|
23
|
-
#
|
24
|
-
# Each implementation must extend the PyObjectSerDe class and define
|
25
|
-
# three methods:
|
26
|
-
# * get_size(item) returns an int of the number of bytes needed to
|
27
|
-
# serialize the given item
|
28
|
-
# * to_bytes(item) returns a bytes object representing a serialized
|
29
|
-
# version of the given item
|
30
|
-
# * from_bytes(data, offset) takes a bytes object (data) and an offset
|
31
|
-
# indicating where in the data array to start reading. The method
|
32
|
-
# returns a tuple with the newly reconstructed object and the
|
33
|
-
# total number of bytes beyond the offset read from the input data.
|
34
|
-
|
35
|
-
# Implements a simple string-encoding scheme where a string is
|
36
|
-
# written as <num_bytes> <string_contents>, with no null termination.
|
37
|
-
# This format allows pre-allocating each string, at the cost of
|
38
|
-
# additional storage. Using this format, the serialized string consumes
|
39
|
-
# 4 + len(item) bytes.
|
40
|
-
class PyStringsSerDe(PyObjectSerDe):
|
41
|
-
def get_size(self, item):
|
42
|
-
return int(4 + len(item))
|
43
|
-
|
44
|
-
def to_bytes(self, item: str):
|
45
|
-
b = bytearray()
|
46
|
-
b.extend(len(item).to_bytes(4, 'little'))
|
47
|
-
b.extend(map(ord,item))
|
48
|
-
return bytes(b)
|
49
|
-
|
50
|
-
def from_bytes(self, data: bytes, offset: int):
|
51
|
-
num_chars = int.from_bytes(data[offset:offset+3], 'little')
|
52
|
-
if (num_chars < 0 or num_chars > offset + len(data)):
|
53
|
-
raise IndexError(f'num_chars read must be non-negative and not larger than the buffer. Found {num_chars}')
|
54
|
-
str = data[offset+4:offset+4+num_chars].decode()
|
55
|
-
return (str, 4+num_chars)
|
56
|
-
|
57
|
-
# Implements an integer-encoding scheme where each integer is written
|
58
|
-
# as a 32-bit (4 byte) little-endian value.
|
59
|
-
class PyIntsSerDe(PyObjectSerDe):
|
60
|
-
def get_size(self, item):
|
61
|
-
return int(4)
|
62
|
-
|
63
|
-
def to_bytes(self, item):
|
64
|
-
return struct.pack('i', item)
|
65
|
-
|
66
|
-
def from_bytes(self, data: bytes, offset: int):
|
67
|
-
val = struct.unpack_from('i', data, offset)[0]
|
68
|
-
return (val, 4)
|
69
|
-
|
70
|
-
|
71
|
-
class PyLongsSerDe(PyObjectSerDe):
|
72
|
-
def get_size(self, item):
|
73
|
-
return int(8)
|
74
|
-
|
75
|
-
def to_bytes(self, item):
|
76
|
-
return struct.pack('l', item)
|
77
|
-
|
78
|
-
def from_bytes(self, data: bytes, offset: int):
|
79
|
-
val = struct.unpack_from('l', data, offset)[0]
|
80
|
-
return (val, 8)
|
81
|
-
|
82
|
-
|
83
|
-
class PyFloatsSerDe(PyObjectSerDe):
|
84
|
-
def get_size(self, item):
|
85
|
-
return int(4)
|
86
|
-
|
87
|
-
def to_bytes(self, item):
|
88
|
-
return struct.pack('f', item)
|
89
|
-
|
90
|
-
def from_bytes(self, data: bytes, offset: int):
|
91
|
-
val = struct.unpack_from('f', data, offset)[0]
|
92
|
-
return (val, 4)
|
93
|
-
|
94
|
-
|
95
|
-
class PyDoublesSerDe(PyObjectSerDe):
|
96
|
-
def get_size(self, item):
|
97
|
-
return int(8)
|
98
|
-
|
99
|
-
def to_bytes(self, item):
|
100
|
-
return struct.pack('d', item)
|
101
|
-
|
102
|
-
def from_bytes(self, data: bytes, offset: int):
|
103
|
-
val = struct.unpack_from('d', data, offset)[0]
|
104
|
-
return (val, 8)
|