datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -27,9 +27,20 @@
27
27
  #include "quantiles_sorted_view.hpp"
28
28
  #include "common_defs.hpp"
29
29
  #include "serde.hpp"
30
+ #include "optional.hpp"
30
31
 
31
32
  namespace datasketches {
32
33
 
34
+ /// Constants for Quantiles sketch
35
+ namespace quantiles_constants {
36
+ /// default value of parameter K
37
+ const uint16_t DEFAULT_K = 128;
38
+ /// minimum value of parameter K
39
+ const uint16_t MIN_K = 2;
40
+ /// maximum value of parameter K
41
+ const uint16_t MAX_K = 1 << 15;
42
+ }
43
+
33
44
  /**
34
45
  * This is a stochastic streaming sketch that enables near-real time analysis of the
35
46
  * approximate distribution from a very large stream in a single pass.
@@ -136,13 +147,6 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
136
147
  * @author Alexander Saydakov
137
148
  * @author Jon Malkin
138
149
  */
139
-
140
- namespace quantiles_constants {
141
- const uint16_t DEFAULT_K = 128;
142
- const uint16_t MIN_K = 2;
143
- const uint16_t MAX_K = 1 << 15;
144
- }
145
-
146
150
  template <typename T,
147
151
  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
148
152
  typename Allocator = std::allocator<T>>
@@ -151,13 +155,43 @@ public:
151
155
  using value_type = T;
152
156
  using allocator_type = Allocator;
153
157
  using comparator = Comparator;
158
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
159
+ using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
154
160
 
161
+ /**
162
+ * Constructor
163
+ * @param k affects the size of the sketch and its estimation error
164
+ * @param comparator strict weak ordering function (see C++ named requirements: Compare)
165
+ * @param allocator used to allocate memory
166
+ */
155
167
  explicit quantiles_sketch(uint16_t k = quantiles_constants::DEFAULT_K,
156
168
  const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
169
+
170
+ /**
171
+ * Copy constructor
172
+ * @param other sketch to be copied
173
+ */
157
174
  quantiles_sketch(const quantiles_sketch& other);
175
+
176
+ /** Move constructor
177
+ * @param other sketch to be moved
178
+ */
158
179
  quantiles_sketch(quantiles_sketch&& other) noexcept;
180
+
159
181
  ~quantiles_sketch();
182
+
183
+ /**
184
+ * Copy assignment
185
+ * @param other sketch to be copied
186
+ * @return reference to this sketch
187
+ */
160
188
  quantiles_sketch& operator=(const quantiles_sketch& other);
189
+
190
+ /**
191
+ * Move assignment
192
+ * @param other sketch to be moved
193
+ * @return reference to this sketch
194
+ */
161
195
  quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
162
196
 
163
197
  /**
@@ -247,48 +281,13 @@ public:
247
281
  * If the sketch is empty this throws std::runtime_error.
248
282
  *
249
283
  * @param rank the specified normalized rank in the hypothetical sorted stream.
250
- *
284
+ * @param inclusive if true the weight of the given item is included into the rank.
285
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
286
+ * according to the Comparator.
251
287
  * @return the approximation to the item at the given rank
252
288
  */
253
- using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
254
289
  quantile_return_type get_quantile(double rank, bool inclusive = true) const;
255
290
 
256
- /**
257
- * This is a multiple-query version of get_quantile().
258
- * <p>
259
- * This returns an array that could have been generated by using get_quantile() for each
260
- * normalized rank separately.
261
- *
262
- * <p>If the sketch is empty this throws std::runtime_error.
263
- *
264
- * @param ranks given array of normalized ranks in the hypothetical sorted stream.
265
- * These ranks must be in the interval [0.0, 1.0], inclusive.
266
- * @param size the number of ranks in the array
267
- *
268
- * @return array of approximations to items associated with given ranks in the same order as given ranks
269
- * in the input array.
270
- *
271
- * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
272
- */
273
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
274
-
275
- /**
276
- * This is a multiple-query version of get_quantile() that allows the caller to
277
- * specify the number of evenly-spaced normalized ranks.
278
- *
279
- * <p>If the sketch is empty this throws std::runtime_error.
280
- *
281
- * @param num an integer that specifies the number of evenly-spaced ranks.
282
- * This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
283
- * A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
284
- * get_quantiles([0, 0.5, 1]), etc.
285
- *
286
- * @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
287
- *
288
- * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
289
- */
290
- std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
291
-
292
291
  /**
293
292
  * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
294
293
  *
@@ -300,7 +299,7 @@ public:
300
299
  * @param item to be ranked
301
300
  * @param inclusive if true the weight of the given item is included into the rank.
302
301
  * Otherwise the rank equals the sum of the weights of all items that are less than the given item
303
- * according to the comparator C.
302
+ * according to the Comparator.
304
303
  * @return an approximate normalized rank of the given item
305
304
  */
306
305
  double get_rank(const T& item, bool inclusive = true) const;
@@ -327,7 +326,6 @@ public:
327
326
  * @return an array of m+1 doubles each of which is an approximation
328
327
  * to the fraction of the input stream items (the mass) that fall into one of those intervals.
329
328
  */
330
- using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
331
329
  vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
332
330
 
333
331
  /**
@@ -451,9 +449,26 @@ public:
451
449
  string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
452
450
 
453
451
  class const_iterator;
452
+
453
+ /**
454
+ * Iterator pointing to the first item in the sketch.
455
+ * If the sketch is empty, the returned iterator must not be dereferenced or incremented.
456
+ * @return iterator pointing to the first item in the sketch
457
+ */
454
458
  const_iterator begin() const;
459
+
460
+ /**
461
+ * Iterator pointing to the past-the-end item in the sketch.
462
+ * The past-the-end item is the hypothetical item that would follow the last item.
463
+ * It does not point to any item, and must not be dereferenced or incremented.
464
+ * @return iterator pointing to the past-the-end item in the sketch
465
+ */
455
466
  const_iterator end() const;
456
467
 
468
+ /**
469
+ * Gets the sorted view of this sketch
470
+ * @return the sorted view of this sketch
471
+ */
457
472
  quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
458
473
 
459
474
  private:
@@ -493,19 +508,18 @@ private:
493
508
  uint64_t bit_pattern_;
494
509
  Level base_buffer_;
495
510
  VectorLevels levels_;
496
- T* min_item_;
497
- T* max_item_;
511
+ optional<T> min_item_;
512
+ optional<T> max_item_;
498
513
  mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
499
514
 
500
515
  void setup_sorted_view() const; // modifies mutable state
501
516
  void reset_sorted_view();
502
517
 
503
518
  // for deserialization
504
- class item_deleter;
505
519
  class items_deleter;
506
520
  quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
507
521
  Level&& base_buffer, VectorLevels&& levels,
508
- std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
522
+ optional<T>&& min_item, optional<T>&& max_item,
509
523
  bool is_sorted, const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
510
524
 
511
525
  void grow_base_buffer();
@@ -576,6 +590,9 @@ private:
576
590
  static inline bool check_update_item(TT) {
577
591
  return true;
578
592
  }
593
+
594
+ // for type converting constructor
595
+ template<typename From, typename FC, typename FA> friend class quantiles_sketch;
579
596
  };
580
597
 
581
598
 
@@ -41,8 +41,8 @@ n_(0),
41
41
  bit_pattern_(0),
42
42
  base_buffer_(allocator_),
43
43
  levels_(allocator_),
44
- min_item_(nullptr),
45
- max_item_(nullptr),
44
+ min_item_(),
45
+ max_item_(),
46
46
  sorted_view_(nullptr)
47
47
  {
48
48
  check_k(k_);
@@ -59,12 +59,10 @@ n_(other.n_),
59
59
  bit_pattern_(other.bit_pattern_),
60
60
  base_buffer_(other.base_buffer_),
61
61
  levels_(other.levels_),
62
- min_item_(nullptr),
63
- max_item_(nullptr),
62
+ min_item_(other.min_item_),
63
+ max_item_(other.max_item_),
64
64
  sorted_view_(nullptr)
65
65
  {
66
- if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
67
- if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
68
66
  for (size_t i = 0; i < levels_.size(); ++i) {
69
67
  if (levels_[i].capacity() != other.levels_[i].capacity()) {
70
68
  levels_[i].reserve(other.levels_[i].capacity());
@@ -82,13 +80,10 @@ n_(other.n_),
82
80
  bit_pattern_(other.bit_pattern_),
83
81
  base_buffer_(std::move(other.base_buffer_)),
84
82
  levels_(std::move(other.levels_)),
85
- min_item_(other.min_item_),
86
- max_item_(other.max_item_),
83
+ min_item_(std::move(other.min_item_)),
84
+ max_item_(std::move(other.max_item_)),
87
85
  sorted_view_(nullptr)
88
- {
89
- other.min_item_ = nullptr;
90
- other.max_item_ = nullptr;
91
- }
86
+ {}
92
87
 
93
88
  template<typename T, typename C, typename A>
94
89
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
@@ -126,7 +121,7 @@ quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch
126
121
  template<typename T, typename C, typename A>
127
122
  quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
128
123
  Level&& base_buffer, VectorLevels&& levels,
129
- std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
124
+ optional<T>&& min_item, optional<T>&& max_item,
130
125
  bool is_sorted, const C& comparator, const A& allocator):
131
126
  comparator_(comparator),
132
127
  allocator_(allocator),
@@ -136,13 +131,13 @@ n_(n),
136
131
  bit_pattern_(bit_pattern),
137
132
  base_buffer_(std::move(base_buffer)),
138
133
  levels_(std::move(levels)),
139
- min_item_(min_item.release()),
140
- max_item_(max_item.release()),
134
+ min_item_(std::move(min_item)),
135
+ max_item_(std::move(max_item)),
141
136
  sorted_view_(nullptr)
142
137
  {
143
- uint32_t item_count = base_buffer_.size();
138
+ uint32_t item_count = static_cast<uint32_t>(base_buffer_.size());
144
139
  for (Level& lvl : levels_) {
145
- item_count += lvl.size();
140
+ item_count += static_cast<uint32_t>(lvl.size());
146
141
  }
147
142
  if (item_count != compute_retained_items(k_, n_))
148
143
  throw std::logic_error("Item count does not match value computed from k, n");
@@ -160,8 +155,8 @@ n_(other.get_n()),
160
155
  bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
161
156
  base_buffer_(allocator),
162
157
  levels_(allocator),
163
- min_item_(nullptr),
164
- max_item_(nullptr),
158
+ min_item_(other.min_item_),
159
+ max_item_(other.max_item_),
165
160
  sorted_view_(nullptr)
166
161
  {
167
162
  static_assert(std::is_constructible<T, From>::value,
@@ -170,9 +165,6 @@ sorted_view_(nullptr)
170
165
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
171
166
 
172
167
  if (!other.is_empty()) {
173
- min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
174
- max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
175
-
176
168
  // reserve space in levels
177
169
  const uint8_t num_levels = compute_levels_needed(k_, n_);
178
170
  levels_.reserve(num_levels);
@@ -212,14 +204,6 @@ sorted_view_(nullptr)
212
204
 
213
205
  template<typename T, typename C, typename A>
214
206
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
215
- if (min_item_ != nullptr) {
216
- min_item_->~T();
217
- allocator_.deallocate(min_item_, 1);
218
- }
219
- if (max_item_ != nullptr) {
220
- max_item_->~T();
221
- allocator_.deallocate(max_item_, 1);
222
- }
223
207
  reset_sorted_view();
224
208
  }
225
209
 
@@ -228,8 +212,8 @@ template<typename FwdT>
228
212
  void quantiles_sketch<T, C, A>::update(FwdT&& item) {
229
213
  if (!check_update_item(item)) { return; }
230
214
  if (is_empty()) {
231
- min_item_ = new (allocator_.allocate(1)) T(item);
232
- max_item_ = new (allocator_.allocate(1)) T(item);
215
+ min_item_.emplace(item);
216
+ max_item_.emplace(item);
233
217
  } else {
234
218
  if (comparator_(item, *min_item_)) *min_item_ = item;
235
219
  if (comparator_(*max_item_, item)) *max_item_ = item;
@@ -263,17 +247,17 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
263
247
  // other has data and is in estimation mode
264
248
  if (is_estimation_mode()) {
265
249
  if (k_ == other.get_k()) {
266
- standard_merge(*this, other);
250
+ standard_merge(*this, std::forward<FwdSk>(other));
267
251
  } else if (k_ > other.get_k()) {
268
- quantiles_sketch sk_copy(other);
269
- downsampling_merge(sk_copy, *this);
270
- *this = sk_copy;
252
+ quantiles_sketch sk_copy(std::forward<FwdSk>(other));
253
+ downsampling_merge(sk_copy, std::move(*this));
254
+ *this = std::move(sk_copy);
271
255
  } else { // k_ < other.get_k()
272
- downsampling_merge(*this, other);
256
+ downsampling_merge(*this, std::forward<FwdSk>(other));
273
257
  }
274
258
  } else {
275
259
  // exact or empty
276
- quantiles_sketch sk_copy(other);
260
+ quantiles_sketch sk_copy(std::forward<FwdSk>(other));
277
261
  if (k_ <= other.get_k()) {
278
262
  if (!is_empty()) {
279
263
  for (uint16_t i = 0; i < base_buffer_.size(); ++i) {
@@ -281,9 +265,9 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
281
265
  }
282
266
  }
283
267
  } else { // k_ > other.get_k()
284
- downsampling_merge(sk_copy, *this);
268
+ downsampling_merge(sk_copy, std::move(*this));
285
269
  }
286
- *this = sk_copy;
270
+ *this = std::move(sk_copy);
287
271
  }
288
272
  reset_sorted_view();
289
273
  }
@@ -317,8 +301,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
317
301
  write(os, n_);
318
302
 
319
303
  // min and max
320
- serde.serialize(os, min_item_, 1);
321
- serde.serialize(os, max_item_, 1);
304
+ serde.serialize(os, &*min_item_, 1);
305
+ serde.serialize(os, &*max_item_, 1);
322
306
 
323
307
  // base buffer items
324
308
  serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
@@ -365,8 +349,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
365
349
  ptr += copy_to_mem(n_, ptr);
366
350
 
367
351
  // min and max
368
- ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
369
- ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
352
+ ptr += serde.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
353
+ ptr += serde.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
370
354
 
371
355
  // base buffer items
372
356
  if (base_buffer_.size() > 0)
@@ -409,19 +393,18 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
409
393
  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
410
394
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
411
395
 
412
- A alloc(allocator);
413
- auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
414
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
415
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
416
- std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
417
- std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
418
-
419
- serde.deserialize(is, min_item_buffer.get(), 1);
420
- // serde call did not throw, repackage with destrtuctor
421
- min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
422
- serde.deserialize(is, max_item_buffer.get(), 1);
423
- // serde call did not throw, repackage with destrtuctor
424
- max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
396
+ optional<T> tmp; // space to deserialize min and max
397
+ optional<T> min_item;
398
+ optional<T> max_item;
399
+
400
+ serde.deserialize(is, &*tmp, 1);
401
+ // serde call did not throw, repackage and cleanup
402
+ min_item.emplace(*tmp);
403
+ (*tmp).~T();
404
+ serde.deserialize(is, &*tmp, 1);
405
+ // serde call did not throw, repackage and cleanup
406
+ max_item.emplace(*tmp);
407
+ (*tmp).~T();
425
408
 
426
409
  if (serial_version == 1) {
427
410
  read<uint64_t>(is); // no longer used
@@ -477,7 +460,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
477
460
  items.get_deleter().set_destroy(true);
478
461
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
479
462
 
480
- // succesfully read, now put into a Level
463
+ // successfully read, now put into a Level
481
464
  Level level(allocator);
482
465
  level.reserve(capacity);
483
466
  level.insert(level.begin(),
@@ -524,19 +507,18 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
524
507
  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
525
508
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
526
509
 
527
- A alloc(allocator);
528
- auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
529
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
530
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
531
- std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
532
- std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
533
-
534
- ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
535
- // serde call did not throw, repackage with destrtuctor
536
- min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
537
- ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
538
- // serde call did not throw, repackage with destrtuctor
539
- max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
510
+ optional<T> tmp; // space to deserialize min and max
511
+ optional<T> min_item;
512
+ optional<T> max_item;
513
+
514
+ ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
515
+ // serde call did not throw, repackage and cleanup
516
+ min_item.emplace(*tmp);
517
+ (*tmp).~T();
518
+ ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
519
+ // serde call did not throw, repackage and cleanup
520
+ max_item.emplace(*tmp);
521
+ (*tmp).~T();
540
522
 
541
523
  if (serial_version == 1) {
542
524
  uint64_t unused_long;
@@ -769,42 +751,6 @@ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const
769
751
  return sorted_view_->get_quantile(rank, inclusive);
770
752
  }
771
753
 
772
- template<typename T, typename C, typename A>
773
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
774
- if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
775
- std::vector<T, A> quantiles(allocator_);
776
- quantiles.reserve(size);
777
-
778
- // possible side-effect: sorting base buffer
779
- setup_sorted_view();
780
-
781
- for (uint32_t i = 0; i < size; ++i) {
782
- const double rank = ranks[i];
783
- if ((rank < 0.0) || (rank > 1.0)) {
784
- throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
785
- }
786
- quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
787
- }
788
- return quantiles;
789
- }
790
-
791
- template<typename T, typename C, typename A>
792
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
793
- if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
794
- if (num == 0) {
795
- throw std::invalid_argument("num must be > 0");
796
- }
797
- vector_double ranks(num, 0, allocator_);
798
- ranks[0] = 0.0;
799
- for (size_t i = 1; i < num; i++) {
800
- ranks[i] = static_cast<double>(i) / (num - 1);
801
- }
802
- if (num > 1) {
803
- ranks[num - 1] = 1.0;
804
- }
805
- return get_quantiles(ranks.data(), num, inclusive);
806
- }
807
-
808
754
  template<typename T, typename C, typename A>
809
755
  double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
810
756
  if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
@@ -1012,7 +958,7 @@ void quantiles_sketch<T, C, A>::zip_buffer(Level& buf_in, Level& buf_out) {
1012
958
  uint32_t rand_offset = next_offset;
1013
959
  next_offset = 1 - next_offset;
1014
960
  #else
1015
- uint32_t rand_offset = random_bit();
961
+ uint32_t rand_offset = random_utils::random_bit();
1016
962
  #endif
1017
963
  if ((buf_in.size() != 2 * buf_out.capacity())
1018
964
  || (buf_out.size() > 0)) {
@@ -1127,15 +1073,14 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
1127
1073
  // update min and max items
1128
1074
  // can't just check is_empty() since min and max might not have been set if
1129
1075
  // there were no base buffer items added via update()
1130
- if (tgt.min_item_ == nullptr) {
1131
- tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1076
+ if (!tgt.min_item_) {
1077
+ tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
1132
1078
  } else {
1133
1079
  if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1134
1080
  *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1135
1081
  }
1136
-
1137
- if (tgt.max_item_ == nullptr) {
1138
- tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1082
+ if (!tgt.max_item_) {
1083
+ tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
1139
1084
  } else {
1140
1085
  if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1141
1086
  *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
@@ -1203,15 +1148,14 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1203
1148
  // update min and max items
1204
1149
  // can't just check is_empty() since min and max might not have been set if
1205
1150
  // there were no base buffer items added via update()
1206
- if (tgt.min_item_ == nullptr) {
1207
- tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1151
+ if (!tgt.min_item_) {
1152
+ tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
1208
1153
  } else {
1209
1154
  if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1210
1155
  *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1211
1156
  }
1212
-
1213
- if (tgt.max_item_ == nullptr) {
1214
- tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1157
+ if (!tgt.max_item_) {
1158
+ tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
1215
1159
  } else {
1216
1160
  if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1217
1161
  *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
@@ -1230,20 +1174,6 @@ uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, ui
1230
1174
  return pos;
1231
1175
  }
1232
1176
 
1233
- template<typename T, typename C, typename A>
1234
- class quantiles_sketch<T, C, A>::item_deleter {
1235
- public:
1236
- item_deleter(const A& allocator): allocator_(allocator) {}
1237
- void operator() (T* ptr) {
1238
- if (ptr != nullptr) {
1239
- ptr->~T();
1240
- allocator_.deallocate(ptr, 1);
1241
- }
1242
- }
1243
- private:
1244
- A allocator_;
1245
- };
1246
-
1247
1177
  template<typename T, typename C, typename A>
1248
1178
  class quantiles_sketch<T, C, A>::items_deleter {
1249
1179
  public:
@@ -20,7 +20,6 @@ add_executable(quantiles_test)
20
20
  target_link_libraries(quantiles_test quantiles common common_test_lib)
21
21
 
22
22
  set_target_properties(quantiles_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -42,3 +41,17 @@ target_sources(quantiles_test
42
41
  quantiles_compatibility_test.cpp
43
42
  kolmogorov_smirnov_test.cpp
44
43
  )
44
+
45
+ if (SERDE_COMPAT)
46
+ target_sources(quantiles_test
47
+ PRIVATE
48
+ quantiles_sketch_deserialize_from_java_test.cpp
49
+ )
50
+ endif()
51
+
52
+ if (GENERATE)
53
+ target_sources(quantiles_test
54
+ PRIVATE
55
+ quantiles_sketch_serialize_for_java.cpp
56
+ )
57
+ endif()
@@ -0,0 +1,84 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <quantiles_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // assume the binary sketches for this test have been generated by datasketches-java code
27
+ // in the subdirectory called "java" in the root directory of this project
28
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
29
+
30
+ TEST_CASE("quantiles double", "[serde_compat]") {
31
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
32
+ for (const unsigned n: n_arr) {
33
+ std::ifstream is;
34
+ is.exceptions(std::ios::failbit | std::ios::badbit);
35
+ is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + "_java.sk", std::ios::binary);
36
+ const auto sketch = quantiles_sketch<double>::deserialize(is);
37
+ REQUIRE(sketch.is_empty() == (n == 0));
38
+ REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
39
+ REQUIRE(sketch.get_n() == n);
40
+ if (n > 0) {
41
+ REQUIRE(sketch.get_min_item() == 1.0);
42
+ REQUIRE(sketch.get_max_item() == static_cast<double>(n));
43
+ uint64_t weight = 0;
44
+ for (const auto pair: sketch) {
45
+ REQUIRE(pair.first >= sketch.get_min_item());
46
+ REQUIRE(pair.first <= sketch.get_max_item());
47
+ weight += pair.second;
48
+ }
49
+ REQUIRE(weight == sketch.get_n());
50
+ }
51
+ }
52
+ }
53
+
54
+ struct string_as_number_less {
55
+ bool operator()(const std::string& a, const std::string& b) const {
56
+ return std::stoi(a) < std::stoi(b);
57
+ }
58
+ };
59
+
60
+ TEST_CASE("quantiles string", "[serde_compat]") {
61
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
62
+ for (const unsigned n: n_arr) {
63
+ std::ifstream is;
64
+ is.exceptions(std::ios::failbit | std::ios::badbit);
65
+ is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + "_java.sk", std::ios::binary);
66
+ const auto sketch = quantiles_sketch<std::string, string_as_number_less>::deserialize(is);
67
+ REQUIRE(sketch.is_empty() == (n == 0));
68
+ REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
69
+ REQUIRE(sketch.get_n() == n);
70
+ if (n > 0) {
71
+ REQUIRE(sketch.get_min_item() == "1");
72
+ REQUIRE(sketch.get_max_item() == std::to_string(n));
73
+ uint64_t weight = 0;
74
+ for (const auto pair: sketch) {
75
+ REQUIRE(std::stoi(pair.first) >= std::stoi(sketch.get_min_item()));
76
+ REQUIRE(std::stoi(pair.first) <= std::stoi(sketch.get_max_item()));
77
+ weight += pair.second;
78
+ }
79
+ REQUIRE(weight == sketch.get_n());
80
+ }
81
+ }
82
+ }
83
+
84
+ } /* namespace datasketches */