datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -0,0 +1,52 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <quantiles_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("quantiles sketch double generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ quantiles_sketch<double> sketch;
30
+ for (unsigned i = 1; i <= n; ++i) sketch.update(i);
31
+ std::ofstream os("quantiles_double_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
32
+ sketch.serialize(os);
33
+ }
34
+ }
35
+
36
+ struct compare_as_number {
37
+ bool operator()(const std::string& a, const std::string& b) const {
38
+ return std::stoi(a) < std::stoi(b);
39
+ }
40
+ };
41
+
42
+ TEST_CASE("quantiles sketch string generate", "[serialize_for_java]") {
43
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
44
+ for (const unsigned n: n_arr) {
45
+ quantiles_sketch<std::string, compare_as_number> sketch;
46
+ for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i));
47
+ std::ofstream os("quantiles_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
48
+ sketch.serialize(os);
49
+ }
50
+ }
51
+
52
+ } /* namespace datasketches */
@@ -65,8 +65,6 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
65
65
  REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
66
66
  REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
67
67
  REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
68
- const double fractions[3] {0, 0.5, 1};
69
- REQUIRE_THROWS_AS(sketch.get_quantiles(fractions, 3).empty(), std::runtime_error);
70
68
  const float split_points[1] {0};
71
69
  REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
72
70
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
@@ -98,13 +96,6 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
98
96
  REQUIRE(sketch.get_max_item() == 1.0);
99
97
  REQUIRE(sketch.get_quantile(0.5) == 1.0);
100
98
 
101
- const double fractions[3] {0, 0.5, 1};
102
- auto quantiles = sketch.get_quantiles(fractions, 3);
103
- REQUIRE(quantiles.size() == 3);
104
- REQUIRE(quantiles[0] == 1.0);
105
- REQUIRE(quantiles[1] == 1.0);
106
- REQUIRE(quantiles[2] == 1.0);
107
-
108
99
  int count = 0;
109
100
  for (auto pair: sketch) {
110
101
  REQUIRE(pair.second == 1);
@@ -153,20 +144,6 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
153
144
  REQUIRE(sketch.get_max_item() == n);
154
145
  REQUIRE(sketch.get_quantile(1) == n);
155
146
 
156
- const double ranks[3] {0, 0.5, 1};
157
- auto quantiles = sketch.get_quantiles(ranks, 3);
158
- REQUIRE(quantiles.size() == 3);
159
- REQUIRE(quantiles[0] == 1);
160
- REQUIRE(quantiles[1] == static_cast<float>(n / 2));
161
- REQUIRE(quantiles[2] == n);
162
-
163
- // the alternative method must produce the same result
164
- auto quantiles2 = sketch.get_quantiles(3);
165
- REQUIRE(quantiles2.size() == 3);
166
- REQUIRE(quantiles[0] == quantiles2[0]);
167
- REQUIRE(quantiles[1] == quantiles2[1]);
168
- REQUIRE(quantiles[2] == quantiles2[2]);
169
-
170
147
  int count = 0;
171
148
  for (auto pair: sketch) {
172
149
  REQUIRE(pair.second == 1);
@@ -283,16 +260,16 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
283
260
  // value pushed into higher level
284
261
  REQUIRE(sketch.get_rank(50, false) == Approx(0.49).margin(0.01));
285
262
  REQUIRE(sketch.get_rank(50, true) == 0.50);
286
-
263
+
287
264
  // get_quantile()
288
265
  // value still in base buffer
289
266
  REQUIRE(sketch.get_quantile(0.70, false) == 71);
290
267
  REQUIRE(sketch.get_quantile(0.70, true) == 70);
291
-
268
+
292
269
  // value pushed into higher levell
293
270
  int quantile = sketch.get_quantile(0.30, false);
294
271
  if (quantile != 31 && quantile != 32) { FAIL(); }
295
-
272
+
296
273
  quantile = sketch.get_quantile(0.30, true);
297
274
  if (quantile != 29 && quantile != 30) { FAIL(); }
298
275
  }
@@ -550,7 +527,6 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
550
527
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
551
528
  }
552
529
 
553
-
554
530
  SECTION("merge lower k") {
555
531
  quantiles_float_sketch sketch1(256, std::less<float>(), 0);
556
532
  quantiles_float_sketch sketch2(128, std::less<float>(), 0);
@@ -653,7 +629,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
653
629
  for (int i = 0; i < 100 * k; i++) {
654
630
  sketch1.update(static_cast<float>(i));
655
631
  }
656
-
632
+
657
633
  sketch1.merge(sketch2);
658
634
  REQUIRE(sketch1.get_n() == 101 * k);
659
635
  REQUIRE(sketch1.get_k() == 2 * k); // no reason to have shrunk
@@ -670,7 +646,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
670
646
  sketch1.update(static_cast<float>(i));
671
647
  sketch2.update(static_cast<float>(i));
672
648
  }
673
-
649
+
674
650
  for (int i = 0; i < 100 * k; i++) {
675
651
  sketch2.update(static_cast<float>(i));
676
652
  }
@@ -691,7 +667,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
691
667
  sketch1.update(static_cast<float>(i));
692
668
  sketch2.update(static_cast<float>(-i));
693
669
  }
694
-
670
+
695
671
  sketch1.merge(sketch2);
696
672
  REQUIRE(sketch1.get_n() == 200 * k);
697
673
  REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
@@ -709,7 +685,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
709
685
  sketch1.update(static_cast<float>(i));
710
686
  sketch2.update(static_cast<float>(k - i - 1));
711
687
  }
712
-
688
+
713
689
  for (int i = k; i < 100 * k; i++) {
714
690
  sketch2.update(static_cast<float>(i));
715
691
  }
@@ -733,7 +709,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
733
709
  sketch1.update(static_cast<float>(i));
734
710
  sketch2.update(static_cast<float>(2 * n - i - 1));
735
711
  }
736
-
712
+
737
713
  sketch1.merge(sketch2);
738
714
  REQUIRE(sketch1.get_n() == 2 * n);
739
715
  REQUIRE(sketch1.get_k() == k);
@@ -752,7 +728,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
752
728
  sketch1.update(static_cast<float>(i));
753
729
  sketch2.update(static_cast<float>(2 * n - i - 1));
754
730
  }
755
-
731
+
756
732
  sketch1.merge(sketch2);
757
733
  REQUIRE(sketch1.get_n() == 2 * n);
758
734
  REQUIRE(sketch1.get_k() == k);
@@ -909,10 +885,10 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
909
885
  const uint16_t k = 8;
910
886
  const int n = 403;
911
887
  quantiles_sketch<double> sk_double(k);
912
-
888
+
913
889
  quantiles_sketch<float> sk_float(k);
914
890
  REQUIRE(sk_float.is_empty());
915
-
891
+
916
892
  for (int i = 0; i < n; ++i) sk_double.update(i + .01);
917
893
 
918
894
  quantiles_sketch<int> sk_int(sk_double);
@@ -921,13 +897,13 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
921
897
  REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
922
898
 
923
899
  auto sv_double = sk_double.get_sorted_view();
924
- std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
900
+ std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
925
901
 
926
902
  auto sv_int = sk_int.get_sorted_view();
927
- std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
903
+ std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
928
904
 
929
905
  REQUIRE(vec_double.size() == vec_int.size());
930
-
906
+
931
907
  for (size_t i = 0; i < vec_int.size(); ++i) {
932
908
  // known truncation with conversion so approximate result
933
909
  REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
@@ -27,10 +27,14 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
+ /// REQ sketch constants
30
31
  namespace req_constants {
31
- static const uint16_t MIN_K = 4;
32
- static const uint8_t INIT_NUM_SECTIONS = 3;
33
- static const unsigned MULTIPLIER = 2;
32
+ /// minimum value of parameter K
33
+ const uint16_t MIN_K = 4;
34
+ /// initial number of sections
35
+ const uint8_t INIT_NUM_SECTIONS = 3;
36
+ /// multiplier for nominal capacity
37
+ const unsigned MULTIPLIER = 2;
34
38
  }
35
39
 
36
40
  } /* namespace datasketches */
@@ -277,7 +277,7 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
277
277
  if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
278
278
 
279
279
  if ((state_ & 1) == 1) { coin_ = !coin_; } // for odd flip coin;
280
- else { coin_ = random_bit(); } // random coin flip
280
+ else { coin_ = random_utils::random_bit(); } // random coin flip
281
281
 
282
282
  const auto num = (compaction_range.second - compaction_range.first) / 2;
283
283
  next.ensure_space(num);
@@ -493,7 +493,7 @@ comparator_(comparator),
493
493
  allocator_(allocator),
494
494
  lg_weight_(lg_weight),
495
495
  hra_(hra),
496
- coin_(random_bit()),
496
+ coin_(random_utils::random_bit()),
497
497
  sorted_(sorted),
498
498
  section_size_raw_(section_size_raw),
499
499
  section_size_(nearest_even(section_size_raw)),
@@ -25,9 +25,52 @@
25
25
  #include "req_common.hpp"
26
26
  #include "req_compactor.hpp"
27
27
  #include "quantiles_sorted_view.hpp"
28
+ #include "optional.hpp"
28
29
 
29
30
  namespace datasketches {
30
31
 
32
+ /**
33
+ * Relative Error Quantiles Sketch.
34
+ * This is an implementation based on the paper
35
+ * "Relative Error Streaming Quantiles" by Graham Cormode, Zohar Karnin, Edo Liberty,
36
+ * Justin Thaler, Pavel Veselý, and loosely derived from a Python prototype written by Pavel Veselý.
37
+ *
38
+ * <p>Reference: https://arxiv.org/abs/2004.01668</p>
39
+ *
40
+ * <p>This implementation differs from the algorithm described in the paper in the following:</p>
41
+ *
42
+ * <ul>
43
+ * <li>The algorithm requires no upper bound on the stream length.
44
+ * Instead, each relative-compactor counts the number of compaction operations performed
45
+ * so far (via variable state). Initially, the relative-compactor starts with INIT_NUMBER_OF_SECTIONS.
46
+ * Each time the number of compactions (variable state) exceeds 2^{numSections - 1}, we double
47
+ * numSections. Note that after merging the sketch with another one variable state may not correspond
48
+ * to the number of compactions performed at a particular level, however, since the state variable
49
+ * never exceeds the number of compactions, the guarantees of the sketch remain valid.</li>
50
+ *
51
+ * <li>The size of each section (variable k and section_size in the code and parameter k in
52
+ * the paper) is initialized with a number set by the user via variable k.
53
+ * When the number of sections doubles, we decrease section_size by a factor of sqrt(2).
54
+ * This is applied at each level separately. Thus, when we double the number of sections, the
55
+ * nominal compactor size increases by a factor of approx. sqrt(2) (+/- rounding).</li>
56
+ *
57
+ * <li>The merge operation here does not perform "special compactions", which are used in the paper
58
+ * to allow for a tight mathematical analysis of the sketch.</li>
59
+ * </ul>
60
+ *
61
+ * <p>This implementation provides a number of capabilities not discussed in the paper or provided
62
+ * in the Python prototype.</p>
63
+ *
64
+ * <ul><li>The Python prototype only implemented high accuracy for low ranks. This implementation
65
+ * provides the user with the ability to choose either high rank accuracy or low rank accuracy at
66
+ * the time of sketch construction.</li>
67
+ * <li>The Python prototype only implemented a comparison criterion of "INCLUSIVE". This implementation
68
+ * allows the user to use both the "INCLUSIVE" criterion and the "EXCLUSIVE" criterion.</li>
69
+ * <li>This implementation provides extensive debug visibility into the operation of the sketch with
70
+ * two levels of detail output. This is not only useful for debugging, but is a powerful tool to
71
+ * help users understand how the sketch works.</li>
72
+ * </ul>
73
+ */
31
74
  template<
32
75
  typename T,
33
76
  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
@@ -37,8 +80,16 @@ class req_sketch {
37
80
  public:
38
81
  using value_type = T;
39
82
  using comparator = Comparator;
83
+ using allocator_type = Allocator;
40
84
  using Compactor = req_compactor<T, Comparator, Allocator>;
41
85
  using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
86
+ using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
87
+
88
+ /**
89
+ * Quantile return type.
90
+ * This is to return quantiles either by value (for arithmetic types) or by const reference (for all other types)
91
+ */
92
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
42
93
 
43
94
  /**
44
95
  * Constructor
@@ -46,19 +97,41 @@ public:
46
97
  * Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
47
98
  * @param hra if true, the default, the high ranks are prioritized for better
48
99
  * accuracy. Otherwise the low ranks are prioritized for better accuracy.
49
- * @param comparator to use by this instance
50
- * @param allocator to use by this instance
100
+ * @param comparator strict weak ordering function (see C++ named requirements: Compare)
101
+ * @param allocator used by this sketch to allocate memory
51
102
  */
52
103
  explicit req_sketch(uint16_t k, bool hra = true, const Comparator& comparator = Comparator(),
53
104
  const Allocator& allocator = Allocator());
54
105
 
55
- ~req_sketch();
106
+ /**
107
+ * Copy constructor
108
+ * @param other sketch to be copied
109
+ */
56
110
  req_sketch(const req_sketch& other);
111
+
112
+ /**
113
+ * Move constructor
114
+ * @param other sketch to be moved
115
+ */
57
116
  req_sketch(req_sketch&& other) noexcept;
117
+
118
+ ~req_sketch();
119
+
120
+ /**
121
+ * Copy assignment
122
+ * @param other sketch to be copied
123
+ * @return reference to this sketch
124
+ */
58
125
  req_sketch& operator=(const req_sketch& other);
126
+
127
+ /**
128
+ * Move assignment
129
+ * @param other sketch to be moved
130
+ * @return reference to this sketch
131
+ */
59
132
  req_sketch& operator=(req_sketch&& other);
60
133
 
61
- /*
134
+ /**
62
135
  * Type converting constructor.
63
136
  * @param other sketch of a different type
64
137
  * @param comparator instance of a Comparator
@@ -177,7 +250,6 @@ public:
177
250
  * @return an array of m+1 doubles each of which is an approximation
178
251
  * to the fraction of the input stream items (the mass) that fall into one of those intervals.
179
252
  */
180
- using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
181
253
  vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
182
254
 
183
255
  /**
@@ -214,22 +286,8 @@ public:
214
286
  *
215
287
  * @return approximate quantile associated with the given rank
216
288
  */
217
- using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
218
289
  quantile_return_type get_quantile(double rank, bool inclusive = true) const;
219
290
 
220
- /**
221
- * Returns an array of quantiles that correspond to the given array of normalized ranks.
222
- * <p>If the sketch is empty this throws std::runtime_error.
223
- *
224
- * @param ranks given array of normalized ranks.
225
- * @param size the number of ranks in the array.
226
- *
227
- * @return array of quantiles that correspond to the given array of normalized ranks
228
- *
229
- * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
230
- */
231
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
232
-
233
291
  /**
234
292
  * Returns an approximate lower bound of the given normalized rank.
235
293
  * @param rank the given rank, a value between 0 and 1.0.
@@ -333,9 +391,26 @@ public:
333
391
  string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
334
392
 
335
393
  class const_iterator;
394
+
395
+ /**
396
+ * Iterator pointing to the first item in the sketch.
397
+ * If the sketch is empty, the returned iterator must not be dereferenced or incremented.
398
+ * @return iterator pointing to the first item in the sketch
399
+ */
336
400
  const_iterator begin() const;
401
+
402
+ /**
403
+ * Iterator pointing to the past-the-end item in the sketch.
404
+ * The past-the-end item is the hypothetical item that would follow the last item.
405
+ * It does not point to any item, and must not be dereferenced or incremented.
406
+ * @return iterator pointing to the past-the-end item in the sketch
407
+ */
337
408
  const_iterator end() const;
338
409
 
410
+ /**
411
+ * Gets the sorted view of this sketch
412
+ * @return the sorted view of this sketch
413
+ */
339
414
  quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
340
415
 
341
416
  private:
@@ -347,8 +422,8 @@ private:
347
422
  uint32_t num_retained_;
348
423
  uint64_t n_;
349
424
  std::vector<Compactor, AllocCompactor> compactors_;
350
- T* min_item_;
351
- T* max_item_;
425
+ optional<T> min_item_;
426
+ optional<T> max_item_;
352
427
  mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
353
428
 
354
429
  void setup_sorted_view() const; // modifies mutable state
@@ -375,9 +450,8 @@ private:
375
450
  static bool is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra);
376
451
 
377
452
  // for deserialization
378
- class item_deleter;
379
453
  req_sketch(uint16_t k, bool hra, uint64_t n,
380
- std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
454
+ optional<T>&& min_item, optional<T>&& max_item,
381
455
  std::vector<Compactor, AllocCompactor>&& compactors, const Comparator& comparator);
382
456
 
383
457
  static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);