datasketches 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/ext/datasketches/kll_wrapper.cpp +5 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
  7. data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
  8. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  10. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  11. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  12. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  13. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  14. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  16. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  17. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  20. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  21. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  24. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  25. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  26. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  28. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  29. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  30. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  31. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  32. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  33. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  34. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  35. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  36. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  37. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
  38. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  39. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  40. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
  41. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
  42. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
  43. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  44. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  45. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  46. data/vendor/datasketches-cpp/python/README.md +7 -0
  47. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  48. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  49. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  50. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  51. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  52. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  53. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  54. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  55. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  56. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  57. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  58. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  59. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  60. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  61. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  62. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  63. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  64. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  65. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  66. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  67. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  68. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  69. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  70. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  71. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
  72. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  73. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  74. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  75. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
  76. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  77. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  78. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
  79. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  80. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
  81. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  82. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  83. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  84. data/vendor/datasketches-cpp/setup.py +1 -1
  85. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  86. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
  87. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
  88. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
  89. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
  90. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
  91. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
  92. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  93. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  94. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
  95. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
  96. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
  97. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
  98. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
  99. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  100. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  101. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
  102. metadata +25 -9
  103. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  104. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  105. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  106. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
@@ -0,0 +1,44 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_executable(quantiles_test)
19
+
20
+ target_link_libraries(quantiles_test quantiles common common_test)
21
+
22
+ set_target_properties(quantiles_test PROPERTIES
23
+ CXX_STANDARD 11
24
+ CXX_STANDARD_REQUIRED YES
25
+ )
26
+
27
+ file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" QUANTILES_TEST_BINARY_PATH)
28
+ string(APPEND QUANTILES_TEST_BINARY_PATH "/")
29
+ target_compile_definitions(quantiles_test
30
+ PRIVATE
31
+ TEST_BINARY_INPUT_PATH="${QUANTILES_TEST_BINARY_PATH}"
32
+ )
33
+
34
+ add_test(
35
+ NAME quantiles_test
36
+ COMMAND quantiles_test
37
+ )
38
+
39
+ target_sources(quantiles_test
40
+ PRIVATE
41
+ quantiles_sketch_test.cpp
42
+ quantiles_compatibility_test.cpp
43
+ kolmogorov_smirnov_test.cpp
44
+ )
@@ -0,0 +1,110 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <random>
23
+
24
+ #include <quantiles_sketch.hpp>
25
+ #include <kolmogorov_smirnov.hpp>
26
+
27
+ namespace datasketches {
28
+
29
+ TEST_CASE("kolmogorov-smirnov empty", "[quantiles_sketch]") {
30
+ quantiles_sketch<double> sketch1;
31
+ quantiles_sketch<double> sketch2;
32
+ REQUIRE(kolmogorov_smirnov::delta(sketch1, sketch2) == 0);
33
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.01));
34
+ }
35
+
36
+ TEST_CASE("kolmogorov-smirnov same distribution", "[quantiles_sketch]") {
37
+ const uint16_t k = 128;
38
+ quantiles_sketch<double> sketch1(k);
39
+ quantiles_sketch<double> sketch2(k);
40
+ std::default_random_engine rand;
41
+ std::normal_distribution<double> distr;
42
+ const int n = k * 3 - 1;
43
+ for (int i = 0; i < n; ++i) {
44
+ const double x = distr(rand);
45
+ sketch1.update(x);
46
+ sketch2.update(x);
47
+ }
48
+ REQUIRE(kolmogorov_smirnov::delta(sketch1, sketch2) == Approx(0).margin(0.02));
49
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.01));
50
+ }
51
+
52
+ TEST_CASE("kolmogorov-smirnov very different distributions", "[quantiles_sketch]") {
53
+ const uint16_t k = 128;
54
+ quantiles_sketch<double> sketch1(k);
55
+ quantiles_sketch<double> sketch2(k);
56
+ std::default_random_engine rand;
57
+ std::normal_distribution<double> distr;
58
+ const int n = k * 3 - 1;
59
+ for (int i = 0; i < n; ++i) {
60
+ const double x = distr(rand);
61
+ sketch1.update(x + 100.0);
62
+ sketch2.update(x);
63
+ }
64
+ const auto delta = kolmogorov_smirnov::delta(sketch1, sketch2);
65
+ REQUIRE(delta == Approx(1.0).margin(1e-6));
66
+ REQUIRE(delta <= 1);
67
+ REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
68
+ }
69
+
70
+ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_sketch]") {
71
+ const uint16_t k = 1024;
72
+ quantiles_sketch<double> sketch1(k);
73
+ quantiles_sketch<double> sketch2(k);
74
+ std::default_random_engine rand;
75
+ std::normal_distribution<double> distr;
76
+ const int n = k * 3 - 1;
77
+ for (int i = 0; i < n; ++i) {
78
+ const double x = distr(rand);
79
+ sketch1.update(x + 0.05);
80
+ sketch2.update(x);
81
+ }
82
+ const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
83
+ REQUIRE(delta == Approx(0.02).margin(0.01));
84
+ const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
85
+ std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
86
+ REQUIRE_FALSE(delta > threshold);
87
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
88
+ }
89
+
90
+ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution", "[quantiles_sketch]") {
91
+ const uint16_t k = 8192;
92
+ quantiles_sketch<double> sketch1(k);
93
+ quantiles_sketch<double> sketch2(k);
94
+ std::default_random_engine rand;
95
+ std::normal_distribution<double> distr;
96
+ const int n = k * 3 - 1;
97
+ for (int i = 0; i < n; ++i) {
98
+ const double x = distr(rand);
99
+ sketch1.update(x + 0.05);
100
+ sketch2.update(x);
101
+ }
102
+ const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
103
+ REQUIRE(delta == Approx(0.02).margin(0.01));
104
+ const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
105
+ std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
106
+ REQUIRE(delta > threshold);
107
+ REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
108
+ }
109
+
110
+ } /* namespace datasketches */
@@ -0,0 +1,129 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <cmath>
22
+ #include <sstream>
23
+ #include <fstream>
24
+
25
+ #include <quantiles_sketch.hpp>
26
+ #include <serde.hpp>
27
+ #include <test_allocator.hpp>
28
+
29
+ namespace datasketches {
30
+
31
+ #ifdef TEST_BINARY_INPUT_PATH
32
+ static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
33
+ #else
34
+ static std::string testBinaryInputPath = "test/";
35
+ #endif
36
+
37
+ // these tests are for compatibility with old versions of Java's
38
+ // Quantiles sketch, which is only for doubles.
39
+ //
40
+ // typical usage would be just quantiles_sketch<double>, but here we use test_allocator
41
+ using quantiles_double_sketch = quantiles_sketch<double, std::less<double>, test_allocator<double>>;
42
+
43
+ static void quantiles_decode_and_check(uint16_t k, uint64_t n, const std::string& version,
44
+ double expected_median) {
45
+ const double median_rank = 0.5;
46
+
47
+ std::ostringstream filestr;
48
+ filestr << "Qk" << k << "_n" << n << "_v" << version << ".sk";
49
+ // as stream
50
+ std::ifstream is;
51
+ is.exceptions(std::ios::failbit | std::ios::badbit);
52
+ std::string filename = testBinaryInputPath + filestr.str();
53
+
54
+ is.open(filename, std::ios::binary);
55
+ auto sketch_stream = quantiles_double_sketch::deserialize(is, serde<double>(), 0);
56
+ is.close();
57
+ REQUIRE(sketch_stream.get_quantile(median_rank) == expected_median);
58
+
59
+ // as bytes
60
+ std::ifstream infile(filename, std::ios::binary);
61
+ std::vector<char> bytes(
62
+ (std::istreambuf_iterator<char>(infile)),
63
+ (std::istreambuf_iterator<char>()));
64
+ infile.close();
65
+ auto sketch_bytes = quantiles_double_sketch::deserialize(bytes.data(), bytes.size(), serde<double>(), 0);
66
+ REQUIRE(sketch_bytes.get_quantile(median_rank) == expected_median);
67
+ }
68
+
69
+ TEST_CASE("quantiles compatibility", "[quantiles_compatibility]") {
70
+
71
+ // setup
72
+ test_allocator_total_bytes = 0;
73
+
74
+ SECTION("Qk128_n50_v0.3.0.sk") {
75
+ // file: Qk128_n50_v0.3.0.sk
76
+ // median: 26.0
77
+ quantiles_decode_and_check(128, 50, "0.3.0", 26.0);
78
+ }
79
+
80
+ SECTION("Qk128_n1000_v0.3.0.sk") {
81
+ // file: Qk128_n1000_v0.3.0.sk
82
+ // median: 501.0
83
+ quantiles_decode_and_check(128, 1000, "0.3.0", 501.0);
84
+ }
85
+
86
+ SECTION("Qk128_n50_v0.6.0.sk") {
87
+ // file: Qk128_n50_v0.6.0.sk
88
+ // median: 26.0
89
+ quantiles_decode_and_check(128, 50, "0.6.0", 26.0);
90
+ }
91
+
92
+ SECTION("Qk128_n1000_v0.6.0.sk") {
93
+ // file: Qk128_n1000_v0.6.0.sk
94
+ // median: 501.0
95
+ quantiles_decode_and_check(128, 1000, "0.6.0", 501.0);
96
+ }
97
+
98
+ SECTION("Qk128_n50_v0.8.0.sk") {
99
+ // file: Qk128_n50_v0.8.0.sk
100
+ // median: 26.0
101
+ quantiles_decode_and_check(128, 50, "0.8.0", 26.0);
102
+ }
103
+
104
+ SECTION("Qk128_n1000_v0.8.0.sk") {
105
+ // file: Qk128_n1000_v0.8.0.sk
106
+ // median: 501.0
107
+ quantiles_decode_and_check(128, 1000, "0.8.0", 501.0);
108
+ }
109
+
110
+ SECTION("Qk128_n50_v0.8.3.sk") {
111
+ // file: Qk128_n50_v0.8.3.sk
112
+ // median: 26.0
113
+ quantiles_decode_and_check(128, 50, "0.8.3", 26.0);
114
+ }
115
+
116
+ SECTION("Qk128_n1000_v0.8.3.sk") {
117
+ // file: Qk128_n1000_v0.8.3.sk
118
+ // median: 501.0
119
+ quantiles_decode_and_check(128, 1000, "0.8.3", 501.0);
120
+ }
121
+
122
+ // cleanup
123
+ if (test_allocator_total_bytes != 0) {
124
+ REQUIRE(test_allocator_total_bytes == 0);
125
+ }
126
+
127
+ }
128
+
129
+ } /* namespace datasketches */