datasketches 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +1 -1
  4. data/lib/datasketches/version.rb +1 -1
  5. data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
  6. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  7. data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
  8. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
  9. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  10. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  11. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  12. data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
  13. data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
  14. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
  15. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
  16. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  17. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
  18. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
  19. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  20. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
  21. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  22. data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
  23. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
  24. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  25. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
  26. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  27. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  28. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  29. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  30. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  31. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  32. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  33. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  34. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
  35. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
  36. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  37. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  38. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
  39. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  40. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  41. data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  43. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  44. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
  45. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
  46. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
  47. data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
  48. data/vendor/datasketches-cpp/python/README.md +5 -5
  49. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
  50. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
  51. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
  52. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
  53. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
  54. data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
  55. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
  56. data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
  57. data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
  58. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
  59. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
  60. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
  61. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
  62. data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
  63. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
  64. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
  65. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
  66. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
  67. data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
  68. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
  69. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
  70. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
  71. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
  72. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
  74. data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
  75. data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
  76. data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
  77. data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
  78. data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
  79. data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
  80. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
  81. data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
  82. data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
  83. data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
  84. data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
  85. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
  86. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
  87. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
  88. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
  89. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
  90. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
  96. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
  97. data/vendor/datasketches-cpp/setup.py +1 -1
  98. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  99. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  101. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  102. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
  103. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
  104. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  105. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
  106. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
  107. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
  108. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  109. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  110. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
  111. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  112. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  113. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  114. metadata +31 -3
@@ -17,188 +17,139 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include "py_object_lt.hpp"
21
+ #include "py_object_ostream.hpp"
22
+ #include "quantile_conditional.hpp"
20
23
  #include "quantiles_sketch.hpp"
21
24
 
22
25
  #include <pybind11/pybind11.h>
23
26
  #include <pybind11/stl.h>
24
27
  #include <pybind11/numpy.h>
25
28
  #include <vector>
29
+ #include <stdexcept>
26
30
 
27
31
  namespace py = pybind11;
28
32
 
29
- namespace datasketches {
30
-
31
- namespace python {
32
-
33
- template<typename T>
34
- quantiles_sketch<T> quantiles_sketch_deserialize(py::bytes sk_bytes) {
35
- std::string sk_str = sk_bytes; // implicit cast
36
- return quantiles_sketch<T>::deserialize(sk_str.c_str(), sk_str.length());
37
- }
38
-
39
- template<typename T>
40
- py::object quantiles_sketch_serialize(const quantiles_sketch<T>& sk) {
41
- auto ser_result = sk.serialize();
42
- return py::bytes((char*)ser_result.data(), ser_result.size());
43
- }
44
-
45
- // maybe possible to disambiguate the static vs method rank error calls, but
46
- // this is easier for now
47
- template<typename T>
48
- double quantiles_sketch_generic_normalized_rank_error(uint16_t k, bool pmf) {
49
- return quantiles_sketch<T>::get_normalized_rank_error(k, pmf);
50
- }
51
-
52
- template<typename T>
53
- py::list quantiles_sketch_get_quantiles(const quantiles_sketch<T>& sk,
54
- std::vector<double>& ranks,
55
- bool inclusive) {
56
- size_t n_quantiles = ranks.size();
57
- auto result = sk.get_quantiles(ranks.data(), static_cast<uint32_t>(n_quantiles), inclusive);
58
- // returning as std::vector<> would copy values to a list anyway
59
- py::list list(n_quantiles);
60
- for (size_t i = 0; i < n_quantiles; ++i) {
61
- list[i] = result[i];
62
- }
63
- return list;
64
- }
65
-
66
- template<typename T>
67
- py::list quantiles_sketch_get_pmf(const quantiles_sketch<T>& sk,
68
- std::vector<T>& split_points,
69
- bool inclusive) {
70
- size_t n_points = split_points.size();
71
- auto result = sk.get_PMF(split_points.data(), n_points, inclusive);
72
- py::list list(n_points + 1);
73
- for (size_t i = 0; i <= n_points; ++i) {
74
- list[i] = result[i];
75
- }
76
- return list;
77
- }
78
-
79
- template<typename T>
80
- py::list quantiles_sketch_get_cdf(const quantiles_sketch<T>& sk,
81
- std::vector<T>& split_points,
82
- bool inclusive) {
83
- size_t n_points = split_points.size();
84
- auto result = sk.get_CDF(split_points.data(), n_points, inclusive);
85
- py::list list(n_points + 1);
86
- for (size_t i = 0; i <= n_points; ++i) {
87
- list[i] = result[i];
88
- }
89
- return list;
90
- }
91
-
92
- template<typename T>
93
- void quantiles_sketch_update(quantiles_sketch<T>& sk, py::array_t<T, py::array::c_style | py::array::forcecast> items) {
94
- if (items.ndim() != 1) {
95
- throw std::invalid_argument("input data must have only one dimension. Found: "
96
- + std::to_string(items.ndim()));
97
- }
98
-
99
- auto data = items.template unchecked<1>();
100
- for (uint32_t i = 0; i < data.size(); ++i) {
101
- sk.update(data(i));
102
- }
103
- }
104
-
105
- }
106
- }
107
-
108
- namespace dspy = datasketches::python;
109
-
110
- template<typename T>
33
+ template<typename T, typename C>
111
34
  void bind_quantiles_sketch(py::module &m, const char* name) {
112
35
  using namespace datasketches;
113
36
 
114
- py::class_<quantiles_sketch<T>>(m, name)
37
+ auto quantiles_class = py::class_<quantiles_sketch<T, C>>(m, name)
115
38
  .def(py::init<uint16_t>(), py::arg("k")=quantiles_constants::DEFAULT_K)
116
- .def(py::init<const quantiles_sketch<T>&>())
117
- .def("update", (void (quantiles_sketch<T>::*)(const T&)) &quantiles_sketch<T>::update, py::arg("item"),
118
- "Updates the sketch with the given value")
119
- .def("update", &dspy::quantiles_sketch_update<T>, py::arg("array"),
120
- "Updates the sketch with the values in the given array")
121
- .def("merge", (void (quantiles_sketch<T>::*)(const quantiles_sketch<T>&)) &quantiles_sketch<T>::merge, py::arg("sketch"),
122
- "Merges the provided sketch into the this one")
123
- .def("__str__", &quantiles_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false,
39
+ .def(py::init<const quantiles_sketch<T, C>&>())
40
+ .def(
41
+ "update",
42
+ static_cast<void (quantiles_sketch<T, C>::*)(const T&)>(&quantiles_sketch<T, C>::update),
43
+ py::arg("item"),
44
+ "Updates the sketch with the given value"
45
+ )
46
+ .def("merge", (void (quantiles_sketch<T, C>::*)(const quantiles_sketch<T, C>&)) &quantiles_sketch<T, C>::merge, py::arg("sketch"),
47
+ "Merges the provided sketch into this one")
48
+ .def("__str__", &quantiles_sketch<T, C>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false,
124
49
  "Produces a string summary of the sketch")
125
- .def("to_string", &quantiles_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false,
50
+ .def("to_string", &quantiles_sketch<T, C>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false,
126
51
  "Produces a string summary of the sketch")
127
- .def("is_empty", &quantiles_sketch<T>::is_empty,
52
+ .def("is_empty", &quantiles_sketch<T, C>::is_empty,
128
53
  "Returns True if the sketch is empty, otherwise False")
129
- .def("get_k", &quantiles_sketch<T>::get_k,
54
+ .def("get_k", &quantiles_sketch<T, C>::get_k,
130
55
  "Returns the configured parameter k")
131
- .def("get_n", &quantiles_sketch<T>::get_n,
56
+ .def("get_n", &quantiles_sketch<T, C>::get_n,
132
57
  "Returns the length of the input stream")
133
- .def("get_num_retained", &quantiles_sketch<T>::get_num_retained,
58
+ .def("get_num_retained", &quantiles_sketch<T, C>::get_num_retained,
134
59
  "Returns the number of retained items (samples) in the sketch")
135
- .def("is_estimation_mode", &quantiles_sketch<T>::is_estimation_mode,
60
+ .def("is_estimation_mode", &quantiles_sketch<T, C>::is_estimation_mode,
136
61
  "Returns True if the sketch is in estimation mode, otherwise False")
137
- .def("get_min_value", &quantiles_sketch<T>::get_min_item,
62
+ .def("get_min_value", &quantiles_sketch<T, C>::get_min_item,
138
63
  "Returns the minimum value from the stream. If empty, quantiles_floats_sketch returns nan; quantiles_ints_sketch throws a RuntimeError")
139
- .def("get_max_value", &quantiles_sketch<T>::get_max_item,
64
+ .def("get_max_value", &quantiles_sketch<T, C>::get_max_item,
140
65
  "Returns the maximum value from the stream. If empty, quantiles_floats_sketch returns nan; quantiles_ints_sketch throws a RuntimeError")
141
- .def("get_quantile", &quantiles_sketch<T>::get_quantile, py::arg("rank"), py::arg("inclusive")=false,
66
+ .def("get_quantile", &quantiles_sketch<T, C>::get_quantile, py::arg("rank"), py::arg("inclusive")=false,
142
67
  "Returns an approximation to the data value "
143
68
  "associated with the given rank in a hypothetical sorted "
144
69
  "version of the input stream so far.\n"
145
70
  "For quantiles_floats_sketch: if the sketch is empty this returns nan. "
146
71
  "For quantiles_ints_sketch: if the sketch is empty this throws a RuntimeError.")
147
- .def("get_quantiles", &dspy::quantiles_sketch_get_quantiles<T>, py::arg("ranks"), py::arg("inclusive")=false,
148
- "This returns an array that could have been generated by using get_quantile() for each "
149
- "normalized rank separately.\n"
150
- "If the sketch is empty this returns an empty vector.\n"
151
- "Deprecated. Will be removed in the next major version. Use get_quantile() instead.")
152
- .def("get_rank", &quantiles_sketch<T>::get_rank, py::arg("value"), py::arg("inclusive")=false,
72
+ .def(
73
+ "get_quantiles",
74
+ [](const quantiles_sketch<T, C>& sk, const std::vector<double>& ranks, bool inclusive) {
75
+ return sk.get_quantiles(ranks.data(), ranks.size(), inclusive);
76
+ },
77
+ py::arg("ranks"), py::arg("inclusive")=false,
78
+ "This returns an array that could have been generated by using get_quantile() for each "
79
+ "normalized rank separately.\n"
80
+ "If the sketch is empty this returns an empty vector.\n"
81
+ "Deprecated. Will be removed in the next major version. Use get_quantile() instead."
82
+ )
83
+ .def("get_rank", &quantiles_sketch<T, C>::get_rank, py::arg("value"), py::arg("inclusive")=false,
153
84
  "Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.\n"
154
85
  "The resulting approximation has a probabilistic guarantee that can be obtained from the "
155
86
  "get_normalized_rank_error(False) function.\n"
156
87
  "With the parameter inclusive=true the weight of the given value is included into the rank."
157
88
  "Otherwise the rank equals the sum of the weights of values less than the given value.\n"
158
89
  "If the sketch is empty this returns nan.")
159
- .def("get_pmf", &dspy::quantiles_sketch_get_pmf<T>, py::arg("split_points"), py::arg("inclusive")=false,
160
- "Returns an approximation to the Probability Mass Function (PMF) of the input stream "
161
- "given a set of split points (values).\n"
162
- "The resulting approximations have a probabilistic guarantee that can be obtained from the "
163
- "get_normalized_rank_error(True) function.\n"
164
- "If the sketch is empty this returns an empty vector.\n"
165
- "split_points is an array of m unique, monotonically increasing float values "
166
- "that divide the real number line into m+1 consecutive disjoint intervals.\n"
167
- "The definition of an 'interval' is inclusive of the left split point (or minimum value) and "
168
- "exclusive of the right split point, with the exception that the last interval will include "
169
- "the maximum value.\n"
170
- "It is not necessary to include either the min or max values in these split points.")
171
- .def("get_cdf", &dspy::quantiles_sketch_get_cdf<T>, py::arg("split_points"), py::arg("inclusive")=false,
172
- "Returns an approximation to the Cumulative Distribution Function (CDF), which is the "
173
- "cumulative analog of the PMF, of the input stream given a set of split points (values).\n"
174
- "The resulting approximations have a probabilistic guarantee that can be obtained from the "
175
- "get_normalized_rank_error(True) function.\n"
176
- "If the sketch is empty this returns an empty vector.\n"
177
- "split_points is an array of m unique, monotonically increasing float values "
178
- "that divide the real number line into m+1 consecutive disjoint intervals.\n"
179
- "The definition of an 'interval' is inclusive of the left split point (or minimum value) and "
180
- "exclusive of the right split point, with the exception that the last interval will include "
181
- "the maximum value.\n"
182
- "It is not necessary to include either the min or max values in these split points.")
183
- .def("normalized_rank_error", (double (quantiles_sketch<T>::*)(bool) const) &quantiles_sketch<T>::get_normalized_rank_error,
184
- py::arg("as_pmf"),
185
- "Gets the normalized rank error for this sketch.\n"
186
- "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
187
- "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
188
- "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials")
189
- .def_static("get_normalized_rank_error", &dspy::quantiles_sketch_generic_normalized_rank_error<T>,
190
- py::arg("k"), py::arg("as_pmf"),
191
- "Gets the normalized rank error given parameters k and the pmf flag.\n"
192
- "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
193
- "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
194
- "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials")
195
- .def("serialize", &dspy::quantiles_sketch_serialize<T>, "Serializes the sketch into a bytes object")
196
- .def_static("deserialize", &dspy::quantiles_sketch_deserialize<T>, "Deserializes the sketch from a bytes object")
197
- ;
90
+ .def(
91
+ "get_pmf",
92
+ [](const quantiles_sketch<T, C>& sk, const std::vector<T>& split_points, bool inclusive) {
93
+ return sk.get_PMF(split_points.data(), split_points.size(), inclusive);
94
+ },
95
+ py::arg("split_points"), py::arg("inclusive")=false,
96
+ "Returns an approximation to the Probability Mass Function (PMF) of the input stream "
97
+ "given a set of split points (values).\n"
98
+ "The resulting approximations have a probabilistic guarantee that can be obtained from the "
99
+ "get_normalized_rank_error(True) function.\n"
100
+ "If the sketch is empty this returns an empty vector.\n"
101
+ "split_points is an array of m unique, monotonically increasing float values "
102
+ "that divide the real number line into m+1 consecutive disjoint intervals.\n"
103
+ "The definition of an 'interval' is inclusive of the left split point (or minimum value) and "
104
+ "exclusive of the right split point, with the exception that the last interval will include "
105
+ "the maximum value.\n"
106
+ "It is not necessary to include either the min or max values in these split points."
107
+ )
108
+ .def(
109
+ "get_cdf",
110
+ [](const quantiles_sketch<T, C>& sk, const std::vector<T>& split_points, bool inclusive) {
111
+ return sk.get_CDF(split_points.data(), split_points.size(), inclusive);
112
+ },
113
+ py::arg("split_points"), py::arg("inclusive")=false,
114
+ "Returns an approximation to the Cumulative Distribution Function (CDF), which is the "
115
+ "cumulative analog of the PMF, of the input stream given a set of split points (values).\n"
116
+ "The resulting approximations have a probabilistic guarantee that can be obtained from the "
117
+ "get_normalized_rank_error(True) function.\n"
118
+ "If the sketch is empty this returns an empty vector.\n"
119
+ "split_points is an array of m unique, monotonically increasing float values "
120
+ "that divide the real number line into m+1 consecutive disjoint intervals.\n"
121
+ "The definition of an 'interval' is inclusive of the left split point (or minimum value) and "
122
+ "exclusive of the right split point, with the exception that the last interval will include "
123
+ "the maximum value.\n"
124
+ "It is not necessary to include either the min or max values in these split points."
125
+ )
126
+ .def(
127
+ "normalized_rank_error",
128
+ static_cast<double (quantiles_sketch<T, C>::*)(bool) const>(&quantiles_sketch<T, C>::get_normalized_rank_error),
129
+ py::arg("as_pmf"),
130
+ "Gets the normalized rank error for this sketch.\n"
131
+ "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
132
+ "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
133
+ "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials"
134
+ )
135
+ .def_static(
136
+ "get_normalized_rank_error",
137
+ [](uint16_t k, bool pmf) { return quantiles_sketch<T, C>::get_normalized_rank_error(k, pmf); },
138
+ py::arg("k"), py::arg("as_pmf"),
139
+ "Gets the normalized rank error given parameters k and the pmf flag.\n"
140
+ "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
141
+ "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
142
+ "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials"
143
+ )
144
+ .def("__iter__", [](const quantiles_sketch<T, C>& s) { return py::make_iterator(s.begin(), s.end()); });
145
+
146
+ add_serialization<T>(quantiles_class);
147
+ add_vector_update<T>(quantiles_class);
198
148
  }
199
149
 
200
150
  void init_quantiles(py::module &m) {
201
- bind_quantiles_sketch<int>(m, "quantiles_ints_sketch");
202
- bind_quantiles_sketch<float>(m, "quantiles_floats_sketch");
203
- bind_quantiles_sketch<double>(m, "quantiles_doubles_sketch");
151
+ bind_quantiles_sketch<int, std::less<int>>(m, "quantiles_ints_sketch");
152
+ bind_quantiles_sketch<float, std::less<float>>(m, "quantiles_floats_sketch");
153
+ bind_quantiles_sketch<double, std::less<double>>(m, "quantiles_doubles_sketch");
154
+ bind_quantiles_sketch<py::object, py_object_lt>(m, "quantiles_items_sketch");
204
155
  }