datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <var_opt_union.hpp>
|
|
21
|
+
#include "test_type.hpp"
|
|
22
|
+
|
|
23
|
+
#include <catch.hpp>
|
|
24
|
+
|
|
25
|
+
#include <vector>
|
|
26
|
+
#include <string>
|
|
27
|
+
#include <sstream>
|
|
28
|
+
#include <fstream>
|
|
29
|
+
#include <cmath>
|
|
30
|
+
#include <random>
|
|
31
|
+
|
|
32
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
33
|
+
static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
|
|
34
|
+
#else
|
|
35
|
+
static std::string testBinaryInputPath = "test/";
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
namespace datasketches {
|
|
39
|
+
|
|
40
|
+
static constexpr double EPS = 1e-13;
|
|
41
|
+
|
|
42
|
+
static var_opt_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
|
|
43
|
+
var_opt_sketch<int> sk(k);
|
|
44
|
+
for (uint64_t i = 0; i < n; ++i) {
|
|
45
|
+
sk.update(i, 1.0);
|
|
46
|
+
}
|
|
47
|
+
return sk;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// if exact_compare = false, checks for equivalence -- specific R region values may differ but
|
|
51
|
+
// R region weights must match
|
|
52
|
+
template<typename T, typename S, typename A>
|
|
53
|
+
static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk2, bool exact_compare = true) {
|
|
54
|
+
REQUIRE(sk1.get_k() == sk2.get_k());
|
|
55
|
+
REQUIRE(sk1.get_n() == sk2.get_n());
|
|
56
|
+
REQUIRE(sk1.get_num_samples() == sk2.get_num_samples());
|
|
57
|
+
|
|
58
|
+
auto it1 = sk1.begin();
|
|
59
|
+
auto it2 = sk2.begin();
|
|
60
|
+
size_t i = 0;
|
|
61
|
+
|
|
62
|
+
while ((it1 != sk1.end()) && (it2 != sk2.end())) {
|
|
63
|
+
const std::pair<const T&, const double> p1 = *it1;
|
|
64
|
+
const std::pair<const T&, const double> p2 = *it2;
|
|
65
|
+
if (exact_compare) {
|
|
66
|
+
REQUIRE(p1.first == p2.first); // data values
|
|
67
|
+
}
|
|
68
|
+
REQUIRE(p1.second == p2.second); // weight values
|
|
69
|
+
++i;
|
|
70
|
+
++it1;
|
|
71
|
+
++it2;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
REQUIRE((it1 == sk1.end() && it2 == sk2.end())); // iterators must end at the same time
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// compare serialization and deserialization results, checking string and stream methods to
|
|
78
|
+
// ensure that the resulting binary images are compatible.
|
|
79
|
+
// if exact_compare = false, checks for equivalence -- specific R region values may differ but
|
|
80
|
+
// R region weights must match
|
|
81
|
+
template<typename T, typename S, typename A>
|
|
82
|
+
static void compare_serialization_deserialization(var_opt_union<T,S,A>& vo_union, bool exact_compare = true) {
|
|
83
|
+
std::vector<uint8_t> bytes = vo_union.serialize();
|
|
84
|
+
|
|
85
|
+
var_opt_union<T> u_from_bytes = var_opt_union<T>::deserialize(bytes.data(), bytes.size());
|
|
86
|
+
var_opt_sketch<T> sk1 = vo_union.get_result();
|
|
87
|
+
var_opt_sketch<T> sk2 = u_from_bytes.get_result();
|
|
88
|
+
check_if_equal(sk1, sk2, exact_compare);
|
|
89
|
+
|
|
90
|
+
std::string str(bytes.begin(), bytes.end());
|
|
91
|
+
std::stringstream ss;
|
|
92
|
+
ss.str(str);
|
|
93
|
+
|
|
94
|
+
var_opt_union<T> u_from_stream = var_opt_union<T>::deserialize(ss);
|
|
95
|
+
sk2 = u_from_stream.get_result();
|
|
96
|
+
check_if_equal(sk1, sk2, exact_compare);
|
|
97
|
+
|
|
98
|
+
ss.seekg(0); // didn't put anything so only reset read position
|
|
99
|
+
vo_union.serialize(ss);
|
|
100
|
+
u_from_stream = var_opt_union<T>::deserialize(ss);
|
|
101
|
+
sk2 = u_from_stream.get_result();
|
|
102
|
+
check_if_equal(sk1, sk2, exact_compare);
|
|
103
|
+
|
|
104
|
+
std::string str_from_stream = ss.str();
|
|
105
|
+
var_opt_union<T> u_from_str = var_opt_union<T>::deserialize(str_from_stream.c_str(), str_from_stream.size());
|
|
106
|
+
sk2 = u_from_str.get_result();
|
|
107
|
+
check_if_equal(sk1, sk2, exact_compare);
|
|
108
|
+
|
|
109
|
+
// check truncated input, too
|
|
110
|
+
REQUIRE_THROWS_AS(var_opt_union<T>::deserialize(bytes.data(), bytes.size() - 5), std::out_of_range);
|
|
111
|
+
std::string str_trunc((char*)&bytes[0], bytes.size() - 5);
|
|
112
|
+
ss.str(str_trunc);
|
|
113
|
+
// next line may throw either std::illegal_argument or std::runtime_exception
|
|
114
|
+
REQUIRE_THROWS_AS(var_opt_union<T>::deserialize(ss), std::exception);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
TEST_CASE("varopt union: bad prelongs", "[var_opt_union]") {
|
|
118
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(32, 33);
|
|
119
|
+
var_opt_union<int> u(32);
|
|
120
|
+
u.update(sk);
|
|
121
|
+
std::vector<uint8_t> bytes = u.serialize();
|
|
122
|
+
|
|
123
|
+
bytes[0] = 0; // corrupt the preamble longs byte to be too small
|
|
124
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(bytes.data(), bytes.size()), std::invalid_argument);
|
|
125
|
+
|
|
126
|
+
// create a stringstream to check the same
|
|
127
|
+
std::stringstream ss;
|
|
128
|
+
std::string str(bytes.begin(), bytes.end());
|
|
129
|
+
ss.str(str);
|
|
130
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(ss), std::invalid_argument);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
TEST_CASE("varopt union: bad serialization version", "[var_opt_union]") {
|
|
134
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(16, 16);
|
|
135
|
+
var_opt_union<int> u(32);
|
|
136
|
+
u.update(sk);
|
|
137
|
+
std::vector<uint8_t> bytes = u.serialize();
|
|
138
|
+
bytes[1] = 0; // corrupt the serialization version byte
|
|
139
|
+
|
|
140
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(bytes.data(), bytes.size()), std::invalid_argument);
|
|
141
|
+
|
|
142
|
+
// create a stringstream to check the same
|
|
143
|
+
std::stringstream ss;
|
|
144
|
+
std::string str(bytes.begin(), bytes.end());
|
|
145
|
+
ss.str(str);
|
|
146
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(ss), std::invalid_argument);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
TEST_CASE("varopt union: invalid k", "[var_opt_union]") {
|
|
150
|
+
REQUIRE_THROWS_AS(var_opt_union<int>(0), std::invalid_argument);
|
|
151
|
+
REQUIRE_THROWS_AS(var_opt_union<int>(1<<31), std::invalid_argument);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
TEST_CASE("varopt union: bad family", "[var_opt_union]") {
|
|
155
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(16, 16);
|
|
156
|
+
var_opt_union<int> u(15);
|
|
157
|
+
u.update(sk);
|
|
158
|
+
std::vector<uint8_t> bytes = u.serialize();
|
|
159
|
+
bytes[2] = 0; // corrupt the family byte
|
|
160
|
+
|
|
161
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(bytes.data(), bytes.size()), std::invalid_argument);
|
|
162
|
+
|
|
163
|
+
std::stringstream ss;
|
|
164
|
+
std::string str(bytes.begin(), bytes.end());
|
|
165
|
+
ss.str(str);
|
|
166
|
+
REQUIRE_THROWS_AS(var_opt_union<int>::deserialize(ss), std::invalid_argument);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
TEST_CASE("varopt union: empty union", "[var_opt_union]") {
|
|
170
|
+
uint32_t k = 2048;
|
|
171
|
+
var_opt_sketch<std::string> sk(k);
|
|
172
|
+
var_opt_union<std::string> u(k);
|
|
173
|
+
u.update(sk);
|
|
174
|
+
|
|
175
|
+
var_opt_sketch<std::string> result = u.get_result();
|
|
176
|
+
REQUIRE(result.is_empty());
|
|
177
|
+
REQUIRE(result.get_n() == 0);
|
|
178
|
+
REQUIRE(result.get_num_samples() == 0);
|
|
179
|
+
REQUIRE(result.get_k() == k);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
TEST_CASE("varopt union: two exact sketches", "[var_opt_union]") {
|
|
183
|
+
uint64_t n = 4; // 2n < k
|
|
184
|
+
uint32_t k = 10;
|
|
185
|
+
var_opt_sketch<int> sk1(k), sk2(k);
|
|
186
|
+
|
|
187
|
+
for (uint64_t i = 1; i <= n; ++i) {
|
|
188
|
+
sk1.update(i, i);
|
|
189
|
+
sk2.update(static_cast<int64_t>(-i), i);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
var_opt_union<int> u(k);
|
|
193
|
+
u.update(sk1);
|
|
194
|
+
u.update(sk2);
|
|
195
|
+
|
|
196
|
+
var_opt_sketch<int> result = u.get_result();
|
|
197
|
+
REQUIRE(result.get_n() == 2 * n);
|
|
198
|
+
REQUIRE(result.get_k() == k);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
TEST_CASE("varopt union: heavy sampling sketch", "[var_opt_union]") {
|
|
202
|
+
uint64_t n1 = 20;
|
|
203
|
+
uint32_t k1 = 10;
|
|
204
|
+
uint64_t n2 = 6;
|
|
205
|
+
uint32_t k2 = 5;
|
|
206
|
+
var_opt_sketch<int64_t> sk1(k1), sk2(k2);
|
|
207
|
+
for (uint64_t i = 1; i <= n1; ++i) {
|
|
208
|
+
sk1.update(i, i);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
for (uint64_t i = 1; i < n2; ++i) { // we'll add a very heavy one later
|
|
212
|
+
sk2.update(static_cast<int64_t>(-i), i + 1000.0);
|
|
213
|
+
}
|
|
214
|
+
sk2.update(-n2, 1000000.0);
|
|
215
|
+
|
|
216
|
+
var_opt_union<int64_t> u(k1);
|
|
217
|
+
u.update(sk1);
|
|
218
|
+
u.update(sk2);
|
|
219
|
+
|
|
220
|
+
var_opt_sketch<int64_t> result = u.get_result();
|
|
221
|
+
REQUIRE(result.get_n() == n1 + n2);
|
|
222
|
+
REQUIRE(result.get_k() == k2); // heavy enough the result pulls back to k2
|
|
223
|
+
|
|
224
|
+
u.reset();
|
|
225
|
+
result = u.get_result();
|
|
226
|
+
REQUIRE(result.get_n() == 0);
|
|
227
|
+
REQUIRE(result.get_k() == k1); // union reset so empty result reflects max_k
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
TEST_CASE("varopt union: identical sampling sketches", "[var_opt_union]") {
|
|
231
|
+
uint32_t k = 20;
|
|
232
|
+
uint64_t n = 50;
|
|
233
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(k, n);
|
|
234
|
+
|
|
235
|
+
var_opt_union<int> u(k);
|
|
236
|
+
u.update(sk);
|
|
237
|
+
u.update(sk);
|
|
238
|
+
|
|
239
|
+
var_opt_sketch<int> result = u.get_result();
|
|
240
|
+
double expected_wt = 2.0 * n;
|
|
241
|
+
subset_summary ss = result.estimate_subset_sum([](int){return true;});
|
|
242
|
+
REQUIRE(result.get_n() == 2 * n);
|
|
243
|
+
REQUIRE(ss.total_sketch_weight == Approx(expected_wt).margin(EPS));
|
|
244
|
+
|
|
245
|
+
// add another sketch, such that sketch_tau < outer_tau
|
|
246
|
+
sk = create_unweighted_sketch(k, k + 1); // tau = (k + 1) / k
|
|
247
|
+
u.update(sk);
|
|
248
|
+
result = u.get_result();
|
|
249
|
+
expected_wt = (2.0 * n) + k + 1;
|
|
250
|
+
ss = result.estimate_subset_sum([](int){return true;});
|
|
251
|
+
REQUIRE(result.get_n() == (2 * n) + k + 1);
|
|
252
|
+
REQUIRE(ss.total_sketch_weight == Approx(expected_wt).margin(EPS));
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
TEST_CASE("varopt union: small sampling sketch", "[var_opt_union]") {
|
|
256
|
+
uint32_t k_small = 16;
|
|
257
|
+
uint32_t k_max = 128;
|
|
258
|
+
uint64_t n1 = 32;
|
|
259
|
+
uint64_t n2 = 64;
|
|
260
|
+
|
|
261
|
+
var_opt_sketch<float> sk(k_small);
|
|
262
|
+
for (uint64_t i = 0; i < n1; ++i) { sk.update(i); }
|
|
263
|
+
sk.update(-1, n1 * n1); // add a heavy item
|
|
264
|
+
|
|
265
|
+
var_opt_union<float> u(k_max);
|
|
266
|
+
u.update(sk);
|
|
267
|
+
|
|
268
|
+
// another one, but different n to get a different per-item weight
|
|
269
|
+
var_opt_sketch<float> sk2(k_small);
|
|
270
|
+
for (uint64_t i = 0; i < n2; ++i) { sk2.update(i); }
|
|
271
|
+
u.update(sk2);
|
|
272
|
+
|
|
273
|
+
// should trigger migrate_marked_items_by_decreasing_k()
|
|
274
|
+
var_opt_sketch<float> result = u.get_result();
|
|
275
|
+
REQUIRE(result.get_n() == n1 + n2 + 1);
|
|
276
|
+
|
|
277
|
+
double expected_wt = 1.0 * (n1 + n2); // n1 + n2 light items, ignore the heavy one
|
|
278
|
+
subset_summary ss = result.estimate_subset_sum([](float x){return x >= 0;});
|
|
279
|
+
REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
|
|
280
|
+
REQUIRE(ss.total_sketch_weight == Approx(expected_wt + (n1 * n1)).margin(EPS));
|
|
281
|
+
REQUIRE(result.get_k() < k_max);
|
|
282
|
+
|
|
283
|
+
// check that mark information is preserved as expected
|
|
284
|
+
compare_serialization_deserialization(u, false);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
TEST_CASE("varopt union: serialize empty", "[var_opt_union]") {
|
|
288
|
+
var_opt_union<std::string> u(100);
|
|
289
|
+
compare_serialization_deserialization(u);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
TEST_CASE("varopt union: serialize exact", "[var_opt_union]") {
|
|
293
|
+
uint32_t k = 100;
|
|
294
|
+
var_opt_union<int> u(k);
|
|
295
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(k, k / 2);
|
|
296
|
+
u.update(sk);
|
|
297
|
+
|
|
298
|
+
compare_serialization_deserialization(u);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
TEST_CASE("varopt union: serialize sampling", "[var_opt_union]") {
|
|
302
|
+
uint32_t k = 100;
|
|
303
|
+
var_opt_union<int> u(k);
|
|
304
|
+
var_opt_sketch<int> sk = create_unweighted_sketch(k, 2 * k);
|
|
305
|
+
u.update(sk);
|
|
306
|
+
|
|
307
|
+
compare_serialization_deserialization(u);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
TEST_CASE("varopt union: deserialize from java", "[var_opt_union]") {
|
|
311
|
+
std::ifstream is;
|
|
312
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
313
|
+
is.open(testBinaryInputPath + "varopt_union_double_sampling.sk", std::ios::binary);
|
|
314
|
+
var_opt_union<double> u = var_opt_union<double>::deserialize(is);
|
|
315
|
+
|
|
316
|
+
// must reduce k in the process, like in small_sampling_sketch()
|
|
317
|
+
var_opt_sketch<double> result = u.get_result();
|
|
318
|
+
REQUIRE_FALSE(result.is_empty());
|
|
319
|
+
REQUIRE(result.get_n() == 97);
|
|
320
|
+
|
|
321
|
+
double expected_wt = 96.0;// light items -- ignoring the heavy one
|
|
322
|
+
subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;});
|
|
323
|
+
REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
|
|
324
|
+
REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS));
|
|
325
|
+
REQUIRE(result.get_k() < 128);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
TEST_CASE( "varopt union: move", "[var_opt_union][test_type]") {
|
|
329
|
+
uint32_t n = 20;
|
|
330
|
+
uint32_t k = 5;
|
|
331
|
+
var_opt_union<test_type> u(k);
|
|
332
|
+
var_opt_sketch<test_type> sk1(k);
|
|
333
|
+
var_opt_sketch<test_type> sk2(k);
|
|
334
|
+
|
|
335
|
+
// move udpates
|
|
336
|
+
for (int i = 0; i < (int) n; ++i) {
|
|
337
|
+
sk1.update(i);
|
|
338
|
+
sk2.update(-i);
|
|
339
|
+
}
|
|
340
|
+
REQUIRE(sk1.get_n() == n);
|
|
341
|
+
REQUIRE(sk2.get_n() == n);
|
|
342
|
+
|
|
343
|
+
// move unions
|
|
344
|
+
u.update(std::move(sk2));
|
|
345
|
+
u.update(std::move(sk1));
|
|
346
|
+
REQUIRE(u.get_result().get_n() == 2 * n);
|
|
347
|
+
|
|
348
|
+
// move constructor
|
|
349
|
+
var_opt_union<test_type> u2(std::move(u));
|
|
350
|
+
REQUIRE(u2.get_result().get_n() == 2 * n);
|
|
351
|
+
|
|
352
|
+
// move assignment
|
|
353
|
+
var_opt_union<test_type> u3(k);
|
|
354
|
+
u3 = std::move(u2);
|
|
355
|
+
REQUIRE(u3.get_result().get_n() == 2 * n);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
}
|
|
Binary file
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
# Modified from:
|
|
19
|
+
# http://www.benjack.io/2018/02/02/python-cpp-revisited.html
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
import sysconfig
|
|
24
|
+
import platform
|
|
25
|
+
import subprocess
|
|
26
|
+
|
|
27
|
+
from setuptools import setup, find_packages, Extension
|
|
28
|
+
from setuptools.command.build_ext import build_ext
|
|
29
|
+
|
|
30
|
+
class CMakeExtension(Extension):
|
|
31
|
+
def __init__(self, name, sourcedir=''):
|
|
32
|
+
Extension.__init__(self, name, sources=[])
|
|
33
|
+
self.sourcedir = os.path.abspath(sourcedir)
|
|
34
|
+
|
|
35
|
+
class CMakeBuild(build_ext):
|
|
36
|
+
def run(self):
|
|
37
|
+
try:
|
|
38
|
+
subprocess.check_output(['cmake', '--version'])
|
|
39
|
+
except OSError:
|
|
40
|
+
raise RuntimeError(
|
|
41
|
+
"CMake >= 3.12 must be installed to build the following extensions: " +
|
|
42
|
+
", ".join(e.name for e in self.extensions))
|
|
43
|
+
|
|
44
|
+
for ext in self.extensions:
|
|
45
|
+
self.build_extension(ext)
|
|
46
|
+
|
|
47
|
+
def build_extension(self, ext):
|
|
48
|
+
extdir = os.path.abspath(
|
|
49
|
+
os.path.dirname(self.get_ext_fullpath(ext.name)))
|
|
50
|
+
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
|
|
51
|
+
cmake_args += ['-DWITH_PYTHON=True']
|
|
52
|
+
cfg = 'Debug' if self.debug else 'Release'
|
|
53
|
+
build_args = ['--config', cfg]
|
|
54
|
+
|
|
55
|
+
if platform.system() == "Windows":
|
|
56
|
+
cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(
|
|
57
|
+
cfg.upper(),
|
|
58
|
+
extdir)]
|
|
59
|
+
if sys.maxsize > 2**32:
|
|
60
|
+
cmake_args += ['-A', 'x64']
|
|
61
|
+
build_args += ['--', '/m']
|
|
62
|
+
else:
|
|
63
|
+
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
|
|
64
|
+
build_args += ['--', '-j2']
|
|
65
|
+
|
|
66
|
+
env = os.environ.copy()
|
|
67
|
+
env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(
|
|
68
|
+
env.get('CXXFLAGS', ''),
|
|
69
|
+
self.distribution.get_version())
|
|
70
|
+
if not os.path.exists(self.build_temp):
|
|
71
|
+
os.makedirs(self.build_temp)
|
|
72
|
+
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
|
|
73
|
+
cwd=self.build_temp, env=env)
|
|
74
|
+
subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
|
|
75
|
+
cwd=self.build_temp)
|
|
76
|
+
print() # add an empty line to pretty print
|
|
77
|
+
|
|
78
|
+
setup(
|
|
79
|
+
name='datasketches',
|
|
80
|
+
version='2.2.0-SNAPSHOT',
|
|
81
|
+
author='Datasketches Developers',
|
|
82
|
+
author_email='dev@datasketches.apache.org',
|
|
83
|
+
description='A wrapper for the C++ Datasketches library',
|
|
84
|
+
license='Apache License 2.0',
|
|
85
|
+
url='http://datasketches.apache.org',
|
|
86
|
+
long_description=open('python/README.md').read(),
|
|
87
|
+
packages=find_packages('python'), # python pacakges only in this dir
|
|
88
|
+
package_dir={'':'python'},
|
|
89
|
+
# may need to add all source paths for sdist packages w/o MANIFEST.in
|
|
90
|
+
ext_modules=[CMakeExtension('datasketches')],
|
|
91
|
+
cmdclass={'build_ext': CMakeBuild},
|
|
92
|
+
setup_requires=['setuptools_scm','tox-setuptools'],
|
|
93
|
+
zip_safe=False
|
|
94
|
+
)
|