datasketches 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/ext/datasketches/kll_wrapper.cpp +5 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
  7. data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
  8. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  10. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  11. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  12. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  13. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  14. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  16. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  17. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  20. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  21. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  24. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  25. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  26. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  28. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  29. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  30. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  31. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  32. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  33. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  34. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  35. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  36. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  37. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
  38. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  39. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  40. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
  41. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
  42. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
  43. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  44. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  45. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  46. data/vendor/datasketches-cpp/python/README.md +7 -0
  47. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  48. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  49. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  50. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  51. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  52. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  53. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  54. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  55. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  56. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  57. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  58. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  59. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  60. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  61. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  62. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  63. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  64. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  65. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  66. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  67. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  68. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  69. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  70. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  71. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
  72. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  73. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  74. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  75. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
  76. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  77. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  78. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
  79. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  80. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
  81. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  82. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  83. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  84. data/vendor/datasketches-cpp/setup.py +1 -1
  85. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  86. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
  87. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
  88. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
  89. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
  90. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
  91. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
  92. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  93. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  94. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
  95. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
  96. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
  97. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
  98. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
  99. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  100. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  101. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
  102. metadata +25 -9
  103. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  104. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  105. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  106. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
@@ -0,0 +1,44 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_executable(quantiles_test)
19
+
20
+ target_link_libraries(quantiles_test quantiles common common_test)
21
+
22
+ set_target_properties(quantiles_test PROPERTIES
23
+ CXX_STANDARD 11
24
+ CXX_STANDARD_REQUIRED YES
25
+ )
26
+
27
+ file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" QUANTILES_TEST_BINARY_PATH)
28
+ string(APPEND QUANTILES_TEST_BINARY_PATH "/")
29
+ target_compile_definitions(quantiles_test
30
+ PRIVATE
31
+ TEST_BINARY_INPUT_PATH="${QUANTILES_TEST_BINARY_PATH}"
32
+ )
33
+
34
+ add_test(
35
+ NAME quantiles_test
36
+ COMMAND quantiles_test
37
+ )
38
+
39
+ target_sources(quantiles_test
40
+ PRIVATE
41
+ quantiles_sketch_test.cpp
42
+ quantiles_compatibility_test.cpp
43
+ kolmogorov_smirnov_test.cpp
44
+ )
@@ -0,0 +1,110 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <random>
23
+
24
+ #include <quantiles_sketch.hpp>
25
+ #include <kolmogorov_smirnov.hpp>
26
+
27
+ namespace datasketches {
28
+
29
+ TEST_CASE("kolmogorov-smirnov empty", "[quantiles_sketch]") {
30
+ quantiles_sketch<double> sketch1;
31
+ quantiles_sketch<double> sketch2;
32
+ REQUIRE(kolmogorov_smirnov::delta(sketch1, sketch2) == 0);
33
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.01));
34
+ }
35
+
36
+ TEST_CASE("kolmogorov-smirnov same distribution", "[quantiles_sketch]") {
37
+ const uint16_t k = 128;
38
+ quantiles_sketch<double> sketch1(k);
39
+ quantiles_sketch<double> sketch2(k);
40
+ std::default_random_engine rand;
41
+ std::normal_distribution<double> distr;
42
+ const int n = k * 3 - 1;
43
+ for (int i = 0; i < n; ++i) {
44
+ const double x = distr(rand);
45
+ sketch1.update(x);
46
+ sketch2.update(x);
47
+ }
48
+ REQUIRE(kolmogorov_smirnov::delta(sketch1, sketch2) == Approx(0).margin(0.02));
49
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.01));
50
+ }
51
+
52
+ TEST_CASE("kolmogorov-smirnov very different distributions", "[quantiles_sketch]") {
53
+ const uint16_t k = 128;
54
+ quantiles_sketch<double> sketch1(k);
55
+ quantiles_sketch<double> sketch2(k);
56
+ std::default_random_engine rand;
57
+ std::normal_distribution<double> distr;
58
+ const int n = k * 3 - 1;
59
+ for (int i = 0; i < n; ++i) {
60
+ const double x = distr(rand);
61
+ sketch1.update(x + 100.0);
62
+ sketch2.update(x);
63
+ }
64
+ const auto delta = kolmogorov_smirnov::delta(sketch1, sketch2);
65
+ REQUIRE(delta == Approx(1.0).margin(1e-6));
66
+ REQUIRE(delta <= 1);
67
+ REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
68
+ }
69
+
70
+ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_sketch]") {
71
+ const uint16_t k = 1024;
72
+ quantiles_sketch<double> sketch1(k);
73
+ quantiles_sketch<double> sketch2(k);
74
+ std::default_random_engine rand;
75
+ std::normal_distribution<double> distr;
76
+ const int n = k * 3 - 1;
77
+ for (int i = 0; i < n; ++i) {
78
+ const double x = distr(rand);
79
+ sketch1.update(x + 0.05);
80
+ sketch2.update(x);
81
+ }
82
+ const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
83
+ REQUIRE(delta == Approx(0.02).margin(0.01));
84
+ const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
85
+ std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
86
+ REQUIRE_FALSE(delta > threshold);
87
+ REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
88
+ }
89
+
90
+ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution", "[quantiles_sketch]") {
91
+ const uint16_t k = 8192;
92
+ quantiles_sketch<double> sketch1(k);
93
+ quantiles_sketch<double> sketch2(k);
94
+ std::default_random_engine rand;
95
+ std::normal_distribution<double> distr;
96
+ const int n = k * 3 - 1;
97
+ for (int i = 0; i < n; ++i) {
98
+ const double x = distr(rand);
99
+ sketch1.update(x + 0.05);
100
+ sketch2.update(x);
101
+ }
102
+ const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
103
+ REQUIRE(delta == Approx(0.02).margin(0.01));
104
+ const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
105
+ std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
106
+ REQUIRE(delta > threshold);
107
+ REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
108
+ }
109
+
110
+ } /* namespace datasketches */
@@ -0,0 +1,129 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <cmath>
22
+ #include <sstream>
23
+ #include <fstream>
24
+
25
+ #include <quantiles_sketch.hpp>
26
+ #include <serde.hpp>
27
+ #include <test_allocator.hpp>
28
+
29
+ namespace datasketches {
30
+
31
+ #ifdef TEST_BINARY_INPUT_PATH
32
+ static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
33
+ #else
34
+ static std::string testBinaryInputPath = "test/";
35
+ #endif
36
+
37
+ // these tests are for compatibility with old versions of Java's
38
+ // Quantiles sketch, which is only for doubles.
39
+ //
40
+ // typical usage would be just quantiles_sketch<double>, but here we use test_allocator
41
+ using quantiles_double_sketch = quantiles_sketch<double, std::less<double>, test_allocator<double>>;
42
+
43
+ static void quantiles_decode_and_check(uint16_t k, uint64_t n, const std::string& version,
44
+ double expected_median) {
45
+ const double median_rank = 0.5;
46
+
47
+ std::ostringstream filestr;
48
+ filestr << "Qk" << k << "_n" << n << "_v" << version << ".sk";
49
+ // as stream
50
+ std::ifstream is;
51
+ is.exceptions(std::ios::failbit | std::ios::badbit);
52
+ std::string filename = testBinaryInputPath + filestr.str();
53
+
54
+ is.open(filename, std::ios::binary);
55
+ auto sketch_stream = quantiles_double_sketch::deserialize(is, serde<double>(), 0);
56
+ is.close();
57
+ REQUIRE(sketch_stream.get_quantile(median_rank) == expected_median);
58
+
59
+ // as bytes
60
+ std::ifstream infile(filename, std::ios::binary);
61
+ std::vector<char> bytes(
62
+ (std::istreambuf_iterator<char>(infile)),
63
+ (std::istreambuf_iterator<char>()));
64
+ infile.close();
65
+ auto sketch_bytes = quantiles_double_sketch::deserialize(bytes.data(), bytes.size(), serde<double>(), 0);
66
+ REQUIRE(sketch_bytes.get_quantile(median_rank) == expected_median);
67
+ }
68
+
69
+ TEST_CASE("quantiles compatibility", "[quantiles_compatibility]") {
70
+
71
+ // setup
72
+ test_allocator_total_bytes = 0;
73
+
74
+ SECTION("Qk128_n50_v0.3.0.sk") {
75
+ // file: Qk128_n50_v0.3.0.sk
76
+ // median: 26.0
77
+ quantiles_decode_and_check(128, 50, "0.3.0", 26.0);
78
+ }
79
+
80
+ SECTION("Qk128_n1000_v0.3.0.sk") {
81
+ // file: Qk128_n1000_v0.3.0.sk
82
+ // median: 501.0
83
+ quantiles_decode_and_check(128, 1000, "0.3.0", 501.0);
84
+ }
85
+
86
+ SECTION("Qk128_n50_v0.6.0.sk") {
87
+ // file: Qk128_n50_v0.6.0.sk
88
+ // median: 26.0
89
+ quantiles_decode_and_check(128, 50, "0.6.0", 26.0);
90
+ }
91
+
92
+ SECTION("Qk128_n1000_v0.6.0.sk") {
93
+ // file: Qk128_n1000_v0.6.0.sk
94
+ // median: 501.0
95
+ quantiles_decode_and_check(128, 1000, "0.6.0", 501.0);
96
+ }
97
+
98
+ SECTION("Qk128_n50_v0.8.0.sk") {
99
+ // file: Qk128_n50_v0.8.0.sk
100
+ // median: 26.0
101
+ quantiles_decode_and_check(128, 50, "0.8.0", 26.0);
102
+ }
103
+
104
+ SECTION("Qk128_n1000_v0.8.0.sk") {
105
+ // file: Qk128_n1000_v0.8.0.sk
106
+ // median: 501.0
107
+ quantiles_decode_and_check(128, 1000, "0.8.0", 501.0);
108
+ }
109
+
110
+ SECTION("Qk128_n50_v0.8.3.sk") {
111
+ // file: Qk128_n50_v0.8.3.sk
112
+ // median: 26.0
113
+ quantiles_decode_and_check(128, 50, "0.8.3", 26.0);
114
+ }
115
+
116
+ SECTION("Qk128_n1000_v0.8.3.sk") {
117
+ // file: Qk128_n1000_v0.8.3.sk
118
+ // median: 501.0
119
+ quantiles_decode_and_check(128, 1000, "0.8.3", 501.0);
120
+ }
121
+
122
+ // cleanup
123
+ if (test_allocator_total_bytes != 0) {
124
+ REQUIRE(test_allocator_total_bytes == 0);
125
+ }
126
+
127
+ }
128
+
129
+ } /* namespace datasketches */