datasketches 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +3 -3
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/README.md +1 -3
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  21. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  23. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  24. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  25. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
  26. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  27. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  28. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  29. data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
  30. data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
  31. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
  32. data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
  33. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  34. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
  35. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  36. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  37. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  38. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  39. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  40. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
  41. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  42. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  43. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  44. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  45. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  46. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  47. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  48. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
  49. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  50. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  51. data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
  52. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
  53. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  54. data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
  55. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  59. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  60. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  63. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  64. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  76. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  77. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
  78. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  79. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  80. data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
  81. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  82. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  83. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  84. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  85. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  86. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  87. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  88. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
  89. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
  90. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  91. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  92. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  93. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
  94. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
  95. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
  96. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  97. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  98. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  99. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
  100. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  101. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
  102. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
  103. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
  104. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  105. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  106. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  107. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  108. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  109. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  110. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  111. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  112. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  113. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
  114. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
  117. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  118. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  119. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  120. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  121. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  122. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  123. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
  124. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  125. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  126. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
  127. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  128. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  129. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  130. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  131. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  132. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  133. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  134. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  135. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  137. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  140. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  141. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  142. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
  143. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  144. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  145. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
  146. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  147. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
  148. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
  149. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
  150. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  151. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  152. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  153. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
  154. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  155. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  157. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  158. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  159. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  160. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
  161. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  162. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  163. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  164. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  165. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  166. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  167. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  168. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  169. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  170. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  171. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  172. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  173. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  174. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  175. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  176. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  177. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  178. data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
  179. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  180. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  181. metadata +61 -79
  182. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  183. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  184. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  185. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  188. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  189. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  190. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  191. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  192. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  193. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
  194. data/vendor/datasketches-cpp/python/README.md +0 -85
  195. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
  196. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  197. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  198. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  199. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  200. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  201. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  202. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  203. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
  204. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
  205. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
  206. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
  207. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  208. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
  209. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
  210. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
  211. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
  212. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  213. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  214. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  215. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  216. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
  217. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
  218. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
  219. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
  220. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
  221. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
  222. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  223. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
  224. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  225. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  230. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  231. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  232. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  233. data/vendor/datasketches-cpp/setup.py +0 -110
  234. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  238. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  239. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  240. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  241. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  242. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  243. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  244. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  245. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -27,9 +27,20 @@
27
27
  #include "quantiles_sorted_view.hpp"
28
28
  #include "common_defs.hpp"
29
29
  #include "serde.hpp"
30
+ #include "optional.hpp"
30
31
 
31
32
  namespace datasketches {
32
33
 
34
+ /// Constants for Quantiles sketch
35
+ namespace quantiles_constants {
36
+ /// default value of parameter K
37
+ const uint16_t DEFAULT_K = 128;
38
+ /// minimum value of parameter K
39
+ const uint16_t MIN_K = 2;
40
+ /// maximum value of parameter K
41
+ const uint16_t MAX_K = 1 << 15;
42
+ }
43
+
33
44
  /**
34
45
  * This is a stochastic streaming sketch that enables near-real time analysis of the
35
46
  * approximate distribution from a very large stream in a single pass.
@@ -136,13 +147,6 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
136
147
  * @author Alexander Saydakov
137
148
  * @author Jon Malkin
138
149
  */
139
-
140
- namespace quantiles_constants {
141
- const uint16_t DEFAULT_K = 128;
142
- const uint16_t MIN_K = 2;
143
- const uint16_t MAX_K = 1 << 15;
144
- }
145
-
146
150
  template <typename T,
147
151
  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
148
152
  typename Allocator = std::allocator<T>>
@@ -151,13 +155,43 @@ public:
151
155
  using value_type = T;
152
156
  using allocator_type = Allocator;
153
157
  using comparator = Comparator;
158
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
159
+ using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
154
160
 
161
+ /**
162
+ * Constructor
163
+ * @param k affects the size of the sketch and its estimation error
164
+ * @param comparator strict weak ordering function (see C++ named requirements: Compare)
165
+ * @param allocator used to allocate memory
166
+ */
155
167
  explicit quantiles_sketch(uint16_t k = quantiles_constants::DEFAULT_K,
156
168
  const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
169
+
170
+ /**
171
+ * Copy constructor
172
+ * @param other sketch to be copied
173
+ */
157
174
  quantiles_sketch(const quantiles_sketch& other);
175
+
176
+ /** Move constructor
177
+ * @param other sketch to be moved
178
+ */
158
179
  quantiles_sketch(quantiles_sketch&& other) noexcept;
180
+
159
181
  ~quantiles_sketch();
182
+
183
+ /**
184
+ * Copy assignment
185
+ * @param other sketch to be copied
186
+ * @return reference to this sketch
187
+ */
160
188
  quantiles_sketch& operator=(const quantiles_sketch& other);
189
+
190
+ /**
191
+ * Move assignment
192
+ * @param other sketch to be moved
193
+ * @return reference to this sketch
194
+ */
161
195
  quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
162
196
 
163
197
  /**
@@ -247,48 +281,13 @@ public:
247
281
  * If the sketch is empty this throws std::runtime_error.
248
282
  *
249
283
  * @param rank the specified normalized rank in the hypothetical sorted stream.
250
- *
284
+ * @param inclusive if true the weight of the given item is included into the rank.
285
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
286
+ * according to the Comparator.
251
287
  * @return the approximation to the item at the given rank
252
288
  */
253
- using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
254
289
  quantile_return_type get_quantile(double rank, bool inclusive = true) const;
255
290
 
256
- /**
257
- * This is a multiple-query version of get_quantile().
258
- * <p>
259
- * This returns an array that could have been generated by using get_quantile() for each
260
- * normalized rank separately.
261
- *
262
- * <p>If the sketch is empty this throws std::runtime_error.
263
- *
264
- * @param ranks given array of normalized ranks in the hypothetical sorted stream.
265
- * These ranks must be in the interval [0.0, 1.0], inclusive.
266
- * @param size the number of ranks in the array
267
- *
268
- * @return array of approximations to items associated with given ranks in the same order as given ranks
269
- * in the input array.
270
- *
271
- * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
272
- */
273
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
274
-
275
- /**
276
- * This is a multiple-query version of get_quantile() that allows the caller to
277
- * specify the number of evenly-spaced normalized ranks.
278
- *
279
- * <p>If the sketch is empty this throws std::runtime_error.
280
- *
281
- * @param num an integer that specifies the number of evenly-spaced ranks.
282
- * This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
283
- * A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
284
- * get_quantiles([0, 0.5, 1]), etc.
285
- *
286
- * @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
287
- *
288
- * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
289
- */
290
- std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
291
-
292
291
  /**
293
292
  * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
294
293
  *
@@ -300,7 +299,7 @@ public:
300
299
  * @param item to be ranked
301
300
  * @param inclusive if true the weight of the given item is included into the rank.
302
301
  * Otherwise the rank equals the sum of the weights of all items that are less than the given item
303
- * according to the comparator C.
302
+ * according to the Comparator.
304
303
  * @return an approximate normalized rank of the given item
305
304
  */
306
305
  double get_rank(const T& item, bool inclusive = true) const;
@@ -327,7 +326,6 @@ public:
327
326
  * @return an array of m+1 doubles each of which is an approximation
328
327
  * to the fraction of the input stream items (the mass) that fall into one of those intervals.
329
328
  */
330
- using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
331
329
  vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
332
330
 
333
331
  /**
@@ -451,9 +449,26 @@ public:
451
449
  string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
452
450
 
453
451
  class const_iterator;
452
+
453
+ /**
454
+ * Iterator pointing to the first item in the sketch.
455
+ * If the sketch is empty, the returned iterator must not be dereferenced or incremented.
456
+ * @return iterator pointing to the first item in the sketch
457
+ */
454
458
  const_iterator begin() const;
459
+
460
+ /**
461
+ * Iterator pointing to the past-the-end item in the sketch.
462
+ * The past-the-end item is the hypothetical item that would follow the last item.
463
+ * It does not point to any item, and must not be dereferenced or incremented.
464
+ * @return iterator pointing to the past-the-end item in the sketch
465
+ */
455
466
  const_iterator end() const;
456
467
 
468
+ /**
469
+ * Gets the sorted view of this sketch
470
+ * @return the sorted view of this sketch
471
+ */
457
472
  quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
458
473
 
459
474
  private:
@@ -493,19 +508,18 @@ private:
493
508
  uint64_t bit_pattern_;
494
509
  Level base_buffer_;
495
510
  VectorLevels levels_;
496
- T* min_item_;
497
- T* max_item_;
511
+ optional<T> min_item_;
512
+ optional<T> max_item_;
498
513
  mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
499
514
 
500
515
  void setup_sorted_view() const; // modifies mutable state
501
516
  void reset_sorted_view();
502
517
 
503
518
  // for deserialization
504
- class item_deleter;
505
519
  class items_deleter;
506
520
  quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
507
521
  Level&& base_buffer, VectorLevels&& levels,
508
- std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
522
+ optional<T>&& min_item, optional<T>&& max_item,
509
523
  bool is_sorted, const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
510
524
 
511
525
  void grow_base_buffer();
@@ -576,19 +590,27 @@ private:
576
590
  static inline bool check_update_item(TT) {
577
591
  return true;
578
592
  }
593
+
594
+ // for type converting constructor
595
+ template<typename From, typename FC, typename FA> friend class quantiles_sketch;
579
596
  };
580
597
 
581
598
 
582
599
  template<typename T, typename C, typename A>
583
- class quantiles_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
600
+ class quantiles_sketch<T, C, A>::const_iterator {
584
601
  public:
602
+ using iterator_category = std::input_iterator_tag;
585
603
  using value_type = std::pair<const T&, const uint64_t>;
604
+ using difference_type = void;
605
+ using pointer = const return_value_holder<value_type>;
606
+ using reference = const value_type;
607
+
586
608
  const_iterator& operator++();
587
609
  const_iterator& operator++(int);
588
610
  bool operator==(const const_iterator& other) const;
589
611
  bool operator!=(const const_iterator& other) const;
590
- const value_type operator*() const;
591
- const return_value_holder<value_type> operator->() const;
612
+ reference operator*() const;
613
+ pointer operator->() const;
592
614
  private:
593
615
  friend class quantiles_sketch<T, C, A>;
594
616
  using Level = std::vector<T, A>;
@@ -41,8 +41,8 @@ n_(0),
41
41
  bit_pattern_(0),
42
42
  base_buffer_(allocator_),
43
43
  levels_(allocator_),
44
- min_item_(nullptr),
45
- max_item_(nullptr),
44
+ min_item_(),
45
+ max_item_(),
46
46
  sorted_view_(nullptr)
47
47
  {
48
48
  check_k(k_);
@@ -59,12 +59,10 @@ n_(other.n_),
59
59
  bit_pattern_(other.bit_pattern_),
60
60
  base_buffer_(other.base_buffer_),
61
61
  levels_(other.levels_),
62
- min_item_(nullptr),
63
- max_item_(nullptr),
62
+ min_item_(other.min_item_),
63
+ max_item_(other.max_item_),
64
64
  sorted_view_(nullptr)
65
65
  {
66
- if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
67
- if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
68
66
  for (size_t i = 0; i < levels_.size(); ++i) {
69
67
  if (levels_[i].capacity() != other.levels_[i].capacity()) {
70
68
  levels_[i].reserve(other.levels_[i].capacity());
@@ -82,13 +80,10 @@ n_(other.n_),
82
80
  bit_pattern_(other.bit_pattern_),
83
81
  base_buffer_(std::move(other.base_buffer_)),
84
82
  levels_(std::move(other.levels_)),
85
- min_item_(other.min_item_),
86
- max_item_(other.max_item_),
83
+ min_item_(std::move(other.min_item_)),
84
+ max_item_(std::move(other.max_item_)),
87
85
  sorted_view_(nullptr)
88
- {
89
- other.min_item_ = nullptr;
90
- other.max_item_ = nullptr;
91
- }
86
+ {}
92
87
 
93
88
  template<typename T, typename C, typename A>
94
89
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
@@ -126,7 +121,7 @@ quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch
126
121
  template<typename T, typename C, typename A>
127
122
  quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
128
123
  Level&& base_buffer, VectorLevels&& levels,
129
- std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
124
+ optional<T>&& min_item, optional<T>&& max_item,
130
125
  bool is_sorted, const C& comparator, const A& allocator):
131
126
  comparator_(comparator),
132
127
  allocator_(allocator),
@@ -136,13 +131,13 @@ n_(n),
136
131
  bit_pattern_(bit_pattern),
137
132
  base_buffer_(std::move(base_buffer)),
138
133
  levels_(std::move(levels)),
139
- min_item_(min_item.release()),
140
- max_item_(max_item.release()),
134
+ min_item_(std::move(min_item)),
135
+ max_item_(std::move(max_item)),
141
136
  sorted_view_(nullptr)
142
137
  {
143
- uint32_t item_count = base_buffer_.size();
138
+ uint32_t item_count = static_cast<uint32_t>(base_buffer_.size());
144
139
  for (Level& lvl : levels_) {
145
- item_count += lvl.size();
140
+ item_count += static_cast<uint32_t>(lvl.size());
146
141
  }
147
142
  if (item_count != compute_retained_items(k_, n_))
148
143
  throw std::logic_error("Item count does not match value computed from k, n");
@@ -160,8 +155,8 @@ n_(other.get_n()),
160
155
  bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
161
156
  base_buffer_(allocator),
162
157
  levels_(allocator),
163
- min_item_(nullptr),
164
- max_item_(nullptr),
158
+ min_item_(other.min_item_),
159
+ max_item_(other.max_item_),
165
160
  sorted_view_(nullptr)
166
161
  {
167
162
  static_assert(std::is_constructible<T, From>::value,
@@ -170,9 +165,6 @@ sorted_view_(nullptr)
170
165
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
171
166
 
172
167
  if (!other.is_empty()) {
173
- min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
174
- max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
175
-
176
168
  // reserve space in levels
177
169
  const uint8_t num_levels = compute_levels_needed(k_, n_);
178
170
  levels_.reserve(num_levels);
@@ -212,14 +204,6 @@ sorted_view_(nullptr)
212
204
 
213
205
  template<typename T, typename C, typename A>
214
206
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
215
- if (min_item_ != nullptr) {
216
- min_item_->~T();
217
- allocator_.deallocate(min_item_, 1);
218
- }
219
- if (max_item_ != nullptr) {
220
- max_item_->~T();
221
- allocator_.deallocate(max_item_, 1);
222
- }
223
207
  reset_sorted_view();
224
208
  }
225
209
 
@@ -228,8 +212,8 @@ template<typename FwdT>
228
212
  void quantiles_sketch<T, C, A>::update(FwdT&& item) {
229
213
  if (!check_update_item(item)) { return; }
230
214
  if (is_empty()) {
231
- min_item_ = new (allocator_.allocate(1)) T(item);
232
- max_item_ = new (allocator_.allocate(1)) T(item);
215
+ min_item_.emplace(item);
216
+ max_item_.emplace(item);
233
217
  } else {
234
218
  if (comparator_(item, *min_item_)) *min_item_ = item;
235
219
  if (comparator_(*max_item_, item)) *max_item_ = item;
@@ -263,17 +247,17 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
263
247
  // other has data and is in estimation mode
264
248
  if (is_estimation_mode()) {
265
249
  if (k_ == other.get_k()) {
266
- standard_merge(*this, other);
250
+ standard_merge(*this, std::forward<FwdSk>(other));
267
251
  } else if (k_ > other.get_k()) {
268
- quantiles_sketch sk_copy(other);
269
- downsampling_merge(sk_copy, *this);
270
- *this = sk_copy;
252
+ quantiles_sketch sk_copy(std::forward<FwdSk>(other));
253
+ downsampling_merge(sk_copy, std::move(*this));
254
+ *this = std::move(sk_copy);
271
255
  } else { // k_ < other.get_k()
272
- downsampling_merge(*this, other);
256
+ downsampling_merge(*this, std::forward<FwdSk>(other));
273
257
  }
274
258
  } else {
275
259
  // exact or empty
276
- quantiles_sketch sk_copy(other);
260
+ quantiles_sketch sk_copy(std::forward<FwdSk>(other));
277
261
  if (k_ <= other.get_k()) {
278
262
  if (!is_empty()) {
279
263
  for (uint16_t i = 0; i < base_buffer_.size(); ++i) {
@@ -281,9 +265,9 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
281
265
  }
282
266
  }
283
267
  } else { // k_ > other.get_k()
284
- downsampling_merge(sk_copy, *this);
268
+ downsampling_merge(sk_copy, std::move(*this));
285
269
  }
286
- *this = sk_copy;
270
+ *this = std::move(sk_copy);
287
271
  }
288
272
  reset_sorted_view();
289
273
  }
@@ -317,8 +301,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
317
301
  write(os, n_);
318
302
 
319
303
  // min and max
320
- serde.serialize(os, min_item_, 1);
321
- serde.serialize(os, max_item_, 1);
304
+ serde.serialize(os, &*min_item_, 1);
305
+ serde.serialize(os, &*max_item_, 1);
322
306
 
323
307
  // base buffer items
324
308
  serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
@@ -365,8 +349,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
365
349
  ptr += copy_to_mem(n_, ptr);
366
350
 
367
351
  // min and max
368
- ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
369
- ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
352
+ ptr += serde.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
353
+ ptr += serde.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
370
354
 
371
355
  // base buffer items
372
356
  if (base_buffer_.size() > 0)
@@ -409,19 +393,18 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
409
393
  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
410
394
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
411
395
 
412
- A alloc(allocator);
413
- auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
414
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
415
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
416
- std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
417
- std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
418
-
419
- serde.deserialize(is, min_item_buffer.get(), 1);
420
- // serde call did not throw, repackage with destrtuctor
421
- min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
422
- serde.deserialize(is, max_item_buffer.get(), 1);
423
- // serde call did not throw, repackage with destrtuctor
424
- max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
396
+ optional<T> tmp; // space to deserialize min and max
397
+ optional<T> min_item;
398
+ optional<T> max_item;
399
+
400
+ serde.deserialize(is, &*tmp, 1);
401
+ // serde call did not throw, repackage and cleanup
402
+ min_item.emplace(*tmp);
403
+ (*tmp).~T();
404
+ serde.deserialize(is, &*tmp, 1);
405
+ // serde call did not throw, repackage and cleanup
406
+ max_item.emplace(*tmp);
407
+ (*tmp).~T();
425
408
 
426
409
  if (serial_version == 1) {
427
410
  read<uint64_t>(is); // no longer used
@@ -477,7 +460,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
477
460
  items.get_deleter().set_destroy(true);
478
461
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
479
462
 
480
- // succesfully read, now put into a Level
463
+ // successfully read, now put into a Level
481
464
  Level level(allocator);
482
465
  level.reserve(capacity);
483
466
  level.insert(level.begin(),
@@ -524,19 +507,18 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
524
507
  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
525
508
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
526
509
 
527
- A alloc(allocator);
528
- auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
529
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
530
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
531
- std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
532
- std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
533
-
534
- ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
535
- // serde call did not throw, repackage with destrtuctor
536
- min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
537
- ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
538
- // serde call did not throw, repackage with destrtuctor
539
- max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
510
+ optional<T> tmp; // space to deserialize min and max
511
+ optional<T> min_item;
512
+ optional<T> max_item;
513
+
514
+ ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
515
+ // serde call did not throw, repackage and cleanup
516
+ min_item.emplace(*tmp);
517
+ (*tmp).~T();
518
+ ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
519
+ // serde call did not throw, repackage and cleanup
520
+ max_item.emplace(*tmp);
521
+ (*tmp).~T();
540
522
 
541
523
  if (serial_version == 1) {
542
524
  uint64_t unused_long;
@@ -645,12 +627,12 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
645
627
  uint8_t level = 0;
646
628
  os << " BB:" << std::endl;
647
629
  for (const T& item : base_buffer_) {
648
- os << " " << std::to_string(item) << std::endl;
630
+ os << " " << item << std::endl;
649
631
  }
650
632
  for (uint8_t i = 0; i < levels_.size(); ++i) {
651
633
  os << " level " << static_cast<unsigned int>(level) << ":" << std::endl;
652
634
  for (const T& item : levels_[i]) {
653
- os << " " << std::to_string(item) << std::endl;
635
+ os << " " << item << std::endl;
654
636
  }
655
637
  }
656
638
  os << "### End sketch data" << std::endl;
@@ -769,42 +751,6 @@ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const
769
751
  return sorted_view_->get_quantile(rank, inclusive);
770
752
  }
771
753
 
772
- template<typename T, typename C, typename A>
773
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
774
- if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
775
- std::vector<T, A> quantiles(allocator_);
776
- quantiles.reserve(size);
777
-
778
- // possible side-effect: sorting base buffer
779
- setup_sorted_view();
780
-
781
- for (uint32_t i = 0; i < size; ++i) {
782
- const double rank = ranks[i];
783
- if ((rank < 0.0) || (rank > 1.0)) {
784
- throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
785
- }
786
- quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
787
- }
788
- return quantiles;
789
- }
790
-
791
- template<typename T, typename C, typename A>
792
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
793
- if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
794
- if (num == 0) {
795
- throw std::invalid_argument("num must be > 0");
796
- }
797
- vector_double ranks(num, 0, allocator_);
798
- ranks[0] = 0.0;
799
- for (size_t i = 1; i < num; i++) {
800
- ranks[i] = static_cast<double>(i) / (num - 1);
801
- }
802
- if (num > 1) {
803
- ranks[num - 1] = 1.0;
804
- }
805
- return get_quantiles(ranks.data(), num, inclusive);
806
- }
807
-
808
754
  template<typename T, typename C, typename A>
809
755
  double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
810
756
  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
@@ -1012,7 +958,7 @@ void quantiles_sketch<T, C, A>::zip_buffer(Level& buf_in, Level& buf_out) {
1012
958
  uint32_t rand_offset = next_offset;
1013
959
  next_offset = 1 - next_offset;
1014
960
  #else
1015
- uint32_t rand_offset = random_bit();
961
+ uint32_t rand_offset = random_utils::random_bit();
1016
962
  #endif
1017
963
  if ((buf_in.size() != 2 * buf_out.capacity())
1018
964
  || (buf_out.size() > 0)) {
@@ -1127,15 +1073,14 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
1127
1073
  // update min and max items
1128
1074
  // can't just check is_empty() since min and max might not have been set if
1129
1075
  // there were no base buffer items added via update()
1130
- if (tgt.min_item_ == nullptr) {
1131
- tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1076
+ if (!tgt.min_item_) {
1077
+ tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
1132
1078
  } else {
1133
1079
  if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1134
1080
  *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1135
1081
  }
1136
-
1137
- if (tgt.max_item_ == nullptr) {
1138
- tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1082
+ if (!tgt.max_item_) {
1083
+ tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
1139
1084
  } else {
1140
1085
  if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1141
1086
  *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
@@ -1203,15 +1148,14 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1203
1148
  // update min and max items
1204
1149
  // can't just check is_empty() since min and max might not have been set if
1205
1150
  // there were no base buffer items added via update()
1206
- if (tgt.min_item_ == nullptr) {
1207
- tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1151
+ if (!tgt.min_item_) {
1152
+ tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
1208
1153
  } else {
1209
1154
  if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1210
1155
  *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1211
1156
  }
1212
-
1213
- if (tgt.max_item_ == nullptr) {
1214
- tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1157
+ if (!tgt.max_item_) {
1158
+ tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
1215
1159
  } else {
1216
1160
  if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1217
1161
  *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
@@ -1230,20 +1174,6 @@ uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, ui
1230
1174
  return pos;
1231
1175
  }
1232
1176
 
1233
- template<typename T, typename C, typename A>
1234
- class quantiles_sketch<T, C, A>::item_deleter {
1235
- public:
1236
- item_deleter(const A& allocator): allocator_(allocator) {}
1237
- void operator() (T* ptr) {
1238
- if (ptr != nullptr) {
1239
- ptr->~T();
1240
- allocator_.deallocate(ptr, 1);
1241
- }
1242
- }
1243
- private:
1244
- A allocator_;
1245
- };
1246
-
1247
1177
  template<typename T, typename C, typename A>
1248
1178
  class quantiles_sketch<T, C, A>::items_deleter {
1249
1179
  public:
@@ -1354,12 +1284,12 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
1354
1284
  }
1355
1285
 
1356
1286
  template<typename T, typename C, typename A>
1357
- auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
1287
+ auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> reference {
1358
1288
  return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
1359
1289
  }
1360
1290
 
1361
1291
  template<typename T, typename C, typename A>
1362
- auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
1292
+ auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> pointer {
1363
1293
  return **this;
1364
1294
  }
1365
1295
 
@@ -20,7 +20,6 @@ add_executable(quantiles_test)
20
20
  target_link_libraries(quantiles_test quantiles common common_test_lib)
21
21
 
22
22
  set_target_properties(quantiles_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -42,3 +41,17 @@ target_sources(quantiles_test
42
41
  quantiles_compatibility_test.cpp
43
42
  kolmogorov_smirnov_test.cpp
44
43
  )
44
+
45
+ if (SERDE_COMPAT)
46
+ target_sources(quantiles_test
47
+ PRIVATE
48
+ quantiles_sketch_deserialize_from_java_test.cpp
49
+ )
50
+ endif()
51
+
52
+ if (GENERATE)
53
+ target_sources(quantiles_test
54
+ PRIVATE
55
+ quantiles_sketch_serialize_for_java.cpp
56
+ )
57
+ endif()