datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,6 +20,7 @@ import unittest
20
20
  from datasketches import theta_sketch, update_theta_sketch
21
21
  from datasketches import compact_theta_sketch, theta_union
22
22
  from datasketches import theta_intersection, theta_a_not_b
23
+ from datasketches import theta_jaccard_similarity
23
24
 
24
25
  class ThetaTest(unittest.TestCase):
25
26
  def test_theta_basic_example(self):
@@ -39,9 +40,9 @@ class ThetaTest(unittest.TestCase):
39
40
  self.assertLessEqual(sk.get_lower_bound(1), n)
40
41
  self.assertGreaterEqual(sk.get_upper_bound(1), n)
41
42
 
42
- # serialize for storage and reconstruct
43
- sk_bytes = sk.serialize()
44
- new_sk = update_theta_sketch.deserialize(sk_bytes)
43
+ # compact and serialize for storage, then reconstruct
44
+ sk_bytes = sk.compact().serialize()
45
+ new_sk = compact_theta_sketch.deserialize(sk_bytes)
45
46
 
46
47
  # estimate remains unchanged
47
48
  self.assertFalse(sk.is_empty())
@@ -109,6 +110,30 @@ class ThetaTest(unittest.TestCase):
109
110
  self.assertGreaterEqual(result.get_upper_bound(1), 3 * n / 4)
110
111
 
111
112
 
113
+ # JACCARD SIMILARITY
114
+ # Jaccard Similarity measure returns (lower_bound, estimate, upper_bound)
115
+ jac = theta_jaccard_similarity.jaccard(sk1, sk2)
116
+
117
+ # we can check that results are in the expected order
118
+ self.assertLess(jac[0], jac[1])
119
+ self.assertLess(jac[1], jac[2])
120
+
121
+ # checks for sketch equivalency
122
+ self.assertTrue(theta_jaccard_similarity.exactly_equal(sk1, sk1))
123
+ self.assertFalse(theta_jaccard_similarity.exactly_equal(sk1, sk2))
124
+
125
+ # we can apply a check for similarity or dissimilarity at a
126
+ # given threshhold, at 97.7% confidence.
127
+
128
+ # check that the Jaccard Index is at most (upper bound) 0.2.
129
+ # exact result would be 1/7
130
+ self.assertTrue(theta_jaccard_similarity.dissimilarity_test(sk1, sk2, 0.2))
131
+
132
+ # check that the Jaccard Index is at least (lower bound) 0.7
133
+ # exact result would be 3/4, using result from A NOT B test
134
+ self.assertTrue(theta_jaccard_similarity.similarity_test(sk1, result, 0.7))
135
+
136
+
112
137
  def generate_theta_sketch(self, n, k, offset=0):
113
138
  sk = update_theta_sketch(k)
114
139
  for i in range(0, n):
@@ -0,0 +1,60 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_library(req INTERFACE)
19
+
20
+ add_library(${PROJECT_NAME}::REQ ALIAS req)
21
+
22
+ if (BUILD_TESTS)
23
+ add_subdirectory(test)
24
+ endif()
25
+
26
+ target_include_directories(req
27
+ INTERFACE
28
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
29
+ $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
30
+ )
31
+
32
+ target_link_libraries(req INTERFACE common)
33
+ target_compile_features(req INTERFACE cxx_std_11)
34
+
35
+ set(req_HEADERS "")
36
+ list(APPEND req_HEADERS "include/req_common.hpp")
37
+ list(APPEND req_HEADERS "include/req_sketch.hpp")
38
+ list(APPEND req_HEADERS "include/req_sketch_impl.hpp")
39
+ list(APPEND req_HEADERS "include/req_compactor.hpp")
40
+ list(APPEND req_HEADERS "include/req_compactor_impl.hpp")
41
+ list(APPEND req_HEADERS "include/req_quantile_calculator.hpp")
42
+ list(APPEND req_HEADERS "include/req_quantile_calculator_impl.hpp")
43
+
44
+ install(TARGETS req
45
+ EXPORT ${PROJECT_NAME}
46
+ )
47
+
48
+ install(FILES ${req_HEADERS}
49
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
50
+
51
+ target_sources(req
52
+ INTERFACE
53
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_common.hpp
54
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch.hpp
55
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch_impl.hpp
56
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor.hpp
57
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor_impl.hpp
58
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator.hpp
59
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator_impl.hpp
60
+ )
@@ -17,17 +17,26 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #ifndef REQ_COMMON_HPP_
21
+ #define REQ_COMMON_HPP_
22
+
23
+ #include <random>
24
+ #include <chrono>
25
+
26
+ #include "serde.hpp"
27
+ #include "common_defs.hpp"
28
+
20
29
  namespace datasketches {
21
30
 
22
- template<typename A>
23
- theta_a_not_b_experimental<A>::theta_a_not_b_experimental(uint64_t seed, const A& allocator):
24
- state_(seed, allocator)
25
- {}
31
+ // TODO: have a common random bit with KLL
32
+ static std::independent_bits_engine<std::mt19937, 1, unsigned> req_random_bit(std::chrono::system_clock::now().time_since_epoch().count());
26
33
 
27
- template<typename A>
28
- template<typename FwdSketch, typename Sketch>
29
- auto theta_a_not_b_experimental<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
30
- return state_.compute(std::forward<FwdSketch>(a), b, ordered);
34
+ namespace req_constants {
35
+ static const uint16_t MIN_K = 4;
36
+ static const uint8_t INIT_NUM_SECTIONS = 3;
37
+ static const unsigned MULTIPLIER = 2;
31
38
  }
32
39
 
33
40
  } /* namespace datasketches */
41
+
42
+ #endif
@@ -0,0 +1,137 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef REQ_COMPACTOR_HPP_
21
+ #define REQ_COMPACTOR_HPP_
22
+
23
+ #include <memory>
24
+
25
+ namespace datasketches {
26
+
27
+ template<
28
+ typename T,
29
+ typename Comparator,
30
+ typename Allocator
31
+ >
32
+ class req_compactor {
33
+ public:
34
+ req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size, const Allocator& allocator, bool sorted = true);
35
+ ~req_compactor();
36
+ req_compactor(const req_compactor& other);
37
+ req_compactor(req_compactor&& other) noexcept;
38
+ req_compactor& operator=(const req_compactor& other);
39
+ req_compactor& operator=(req_compactor&& other);
40
+
41
+ bool is_sorted() const;
42
+ uint32_t get_num_items() const;
43
+ uint32_t get_nom_capacity() const;
44
+ uint8_t get_lg_weight() const;
45
+ const T* begin() const;
46
+ const T* end() const;
47
+ T* begin();
48
+ T* end();
49
+
50
+ template<bool inclusive>
51
+ uint64_t compute_weight(const T& item) const;
52
+
53
+ template<typename FwdT>
54
+ void append(FwdT&& item);
55
+
56
+ template<typename FwdC>
57
+ void merge(FwdC&& other);
58
+
59
+ void sort();
60
+
61
+ std::pair<uint32_t, uint32_t> compact(req_compactor& next);
62
+
63
+ /**
64
+ * Computes size needed to serialize the current state of the compactor.
65
+ * This version is for fixed-size arithmetic types (integral and floating point).
66
+ * @return size in bytes needed to serialize this compactor
67
+ */
68
+ template<typename S, typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
69
+ size_t get_serialized_size_bytes(const S& serde) const;
70
+
71
+ /**
72
+ * Computes size needed to serialize the current state of the compactor.
73
+ * This version is for all other types and can be expensive since every item needs to be looked at.
74
+ * @return size in bytes needed to serialize this compactor
75
+ */
76
+ template<typename S, typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
77
+ size_t get_serialized_size_bytes(const S& serde) const;
78
+
79
+ template<typename S>
80
+ void serialize(std::ostream& os, const S& serde) const;
81
+
82
+ template<typename S>
83
+ size_t serialize(void* dst, size_t capacity, const S& serde) const;
84
+
85
+ template<typename S>
86
+ static req_compactor deserialize(std::istream& is, const S& serde, const Allocator& allocator, bool sorted, bool hra);
87
+
88
+ template<typename S>
89
+ static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde, const Allocator& allocator, bool sorted, bool hra);
90
+
91
+ template<typename S>
92
+ static req_compactor deserialize(std::istream& is, const S& serde, const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
93
+
94
+ template<typename S>
95
+ static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde, const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
96
+
97
+ private:
98
+ Allocator allocator_;
99
+ uint8_t lg_weight_;
100
+ bool hra_;
101
+ bool coin_; // random bit for compaction
102
+ bool sorted_;
103
+ float section_size_raw_;
104
+ uint32_t section_size_;
105
+ uint8_t num_sections_;
106
+ uint64_t state_; // state of the deterministic compaction schedule
107
+ uint32_t num_items_;
108
+ uint32_t capacity_;
109
+ T* items_;
110
+
111
+ bool ensure_enough_sections();
112
+ std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
113
+ void grow(size_t new_capacity);
114
+ void ensure_space(size_t num);
115
+
116
+ static uint32_t nearest_even(float value);
117
+
118
+ template<typename InIter, typename OutIter>
119
+ static void promote_evens_or_odds(InIter from, InIter to, bool flag, OutIter dst);
120
+
121
+ // for deserialization
122
+ class items_deleter;
123
+ req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
124
+
125
+ template<typename S>
126
+ static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, size_t num);
127
+
128
+ template<typename S>
129
+ static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, size_t num);
130
+
131
+ };
132
+
133
+ } /* namespace datasketches */
134
+
135
+ #include "req_compactor_impl.hpp"
136
+
137
+ #endif
@@ -0,0 +1,501 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef REQ_COMPACTOR_IMPL_HPP_
21
+ #define REQ_COMPACTOR_IMPL_HPP_
22
+
23
+ #include <stdexcept>
24
+ #include <cmath>
25
+ #include <algorithm>
26
+
27
+ #include "count_zeros.hpp"
28
+ #include "conditional_forward.hpp"
29
+
30
+ #include <iomanip>
31
+
32
+ namespace datasketches {
33
+
34
+ template<typename T, typename C, typename A>
35
+ req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size, const A& allocator, bool sorted):
36
+ allocator_(allocator),
37
+ lg_weight_(lg_weight),
38
+ hra_(hra),
39
+ coin_(false),
40
+ sorted_(sorted),
41
+ section_size_raw_(section_size),
42
+ section_size_(section_size),
43
+ num_sections_(req_constants::INIT_NUM_SECTIONS),
44
+ state_(0),
45
+ num_items_(0),
46
+ capacity_(2 * get_nom_capacity()),
47
+ items_(allocator_.allocate(capacity_))
48
+ {}
49
+
50
+ template<typename T, typename C, typename A>
51
+ req_compactor<T, C, A>::~req_compactor() {
52
+ if (items_ != nullptr) {
53
+ for (auto it = begin(); it != end(); ++it) (*it).~T();
54
+ allocator_.deallocate(items_, capacity_);
55
+ }
56
+ }
57
+
58
+ template<typename T, typename C, typename A>
59
+ req_compactor<T, C, A>::req_compactor(const req_compactor& other):
60
+ allocator_(other.allocator_),
61
+ lg_weight_(other.lg_weight_),
62
+ hra_(other.hra_),
63
+ coin_(other.coin_),
64
+ sorted_(other.sorted_),
65
+ section_size_raw_(other.section_size_raw_),
66
+ section_size_(other.section_size_),
67
+ num_sections_(other.num_sections_),
68
+ state_(other.state_),
69
+ num_items_(other.num_items_),
70
+ capacity_(other.capacity_),
71
+ items_(nullptr)
72
+ {
73
+ if (other.items_ != nullptr) {
74
+ items_ = allocator_.allocate(capacity_);
75
+ const size_t from = hra_ ? capacity_ - num_items_ : 0;
76
+ const size_t to = hra_ ? capacity_ : num_items_;
77
+ for (size_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
78
+ }
79
+ }
80
+
81
+ template<typename T, typename C, typename A>
82
+ req_compactor<T, C, A>::req_compactor(req_compactor&& other) noexcept :
83
+ allocator_(std::move(other.allocator_)),
84
+ lg_weight_(other.lg_weight_),
85
+ hra_(other.hra_),
86
+ coin_(other.coin_),
87
+ sorted_(other.sorted_),
88
+ section_size_raw_(other.section_size_raw_),
89
+ section_size_(other.section_size_),
90
+ num_sections_(other.num_sections_),
91
+ state_(other.state_),
92
+ num_items_(other.num_items_),
93
+ capacity_(other.capacity_),
94
+ items_(other.items_)
95
+ {
96
+ other.items_ = nullptr;
97
+ }
98
+
99
+ template<typename T, typename C, typename A>
100
+ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(const req_compactor& other) {
101
+ req_compactor copy(other);
102
+ std::swap(allocator_, copy.allocator_);
103
+ std::swap(lg_weight_, copy.lg_weight_);
104
+ std::swap(hra_, copy.hra_);
105
+ std::swap(coin_, copy.coin_);
106
+ std::swap(sorted_, copy.sorted_);
107
+ std::swap(section_size_raw_, copy.section_size_raw_);
108
+ std::swap(section_size_, copy.section_size_);
109
+ std::swap(num_sections_, copy.num_sections_);
110
+ std::swap(state_, copy.state_);
111
+ std::swap(num_items_, copy.num_items_);
112
+ std::swap(capacity_, copy.capacity_);
113
+ std::swap(items_, copy.items_);
114
+ return *this;
115
+ }
116
+
117
+ template<typename T, typename C, typename A>
118
+ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other) {
119
+ std::swap(allocator_, other.allocator_);
120
+ std::swap(lg_weight_, other.lg_weight_);
121
+ std::swap(hra_, other.hra_);
122
+ std::swap(coin_, other.coin_);
123
+ std::swap(sorted_, other.sorted_);
124
+ std::swap(section_size_raw_, other.section_size_raw_);
125
+ std::swap(section_size_, other.section_size_);
126
+ std::swap(num_sections_, other.num_sections_);
127
+ std::swap(state_, other.state_);
128
+ std::swap(num_items_, other.num_items_);
129
+ std::swap(capacity_, other.capacity_);
130
+ std::swap(items_, other.items_);
131
+ return *this;
132
+ }
133
+
134
+ template<typename T, typename C, typename A>
135
+ bool req_compactor<T, C, A>::is_sorted() const {
136
+ return sorted_;
137
+ }
138
+
139
+ template<typename T, typename C, typename A>
140
+ uint32_t req_compactor<T, C, A>::get_num_items() const {
141
+ return num_items_;
142
+ }
143
+
144
+ template<typename T, typename C, typename A>
145
+ uint32_t req_compactor<T, C, A>::get_nom_capacity() const {
146
+ return req_constants::MULTIPLIER * num_sections_ * section_size_;
147
+ }
148
+
149
+ template<typename T, typename C, typename A>
150
+ uint8_t req_compactor<T, C, A>::get_lg_weight() const {
151
+ return lg_weight_;
152
+ }
153
+
154
+ template<typename T, typename C, typename A>
155
+ template<bool inclusive>
156
+ uint64_t req_compactor<T, C, A>::compute_weight(const T& item) const {
157
+ if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
158
+ auto it = inclusive ?
159
+ std::upper_bound(begin(), end(), item, C()) :
160
+ std::lower_bound(begin(), end(), item, C());
161
+ return std::distance(begin(), it) << lg_weight_;
162
+ }
163
+
164
+ template<typename T, typename C, typename A>
165
+ template<typename FwdT>
166
+ void req_compactor<T, C, A>::append(FwdT&& item) {
167
+ if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
168
+ const size_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
169
+ new (items_ + i) T(std::forward<FwdT>(item));
170
+ ++num_items_;
171
+ if (num_items_ > 1) sorted_ = false;
172
+ }
173
+
174
+ template<typename T, typename C, typename A>
175
+ void req_compactor<T, C, A>::grow(size_t new_capacity) {
176
+ T* new_items = allocator_.allocate(new_capacity);
177
+ size_t new_i = hra_ ? new_capacity - num_items_ : 0;
178
+ for (auto it = begin(); it != end(); ++it, ++new_i) {
179
+ new (new_items + new_i) T(std::move(*it));
180
+ (*it).~T();
181
+ }
182
+ allocator_.deallocate(items_, capacity_);
183
+ items_ = new_items;
184
+ capacity_ = new_capacity;
185
+ }
186
+
187
+ template<typename T, typename C, typename A>
188
+ void req_compactor<T, C, A>::ensure_space(size_t num) {
189
+ if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
190
+ }
191
+
192
+ template<typename T, typename C, typename A>
193
+ const T* req_compactor<T, C, A>::begin() const {
194
+ return items_ + (hra_ ? capacity_ - num_items_ : 0);
195
+ }
196
+
197
+ template<typename T, typename C, typename A>
198
+ const T* req_compactor<T, C, A>::end() const {
199
+ return items_ + (hra_ ? capacity_ : num_items_);
200
+ }
201
+
202
+ template<typename T, typename C, typename A>
203
+ T* req_compactor<T, C, A>::begin() {
204
+ return items_ + (hra_ ? capacity_ - num_items_ : 0);
205
+ }
206
+
207
+ template<typename T, typename C, typename A>
208
+ T* req_compactor<T, C, A>::end() {
209
+ return items_ + (hra_ ? capacity_ : num_items_);
210
+ }
211
+
212
+ template<typename T, typename C, typename A>
213
+ template<typename FwdC>
214
+ void req_compactor<T, C, A>::merge(FwdC&& other) {
215
+ // TODO: swap if other is larger?
216
+ if (lg_weight_ != other.lg_weight_) throw std::logic_error("weight mismatch");
217
+ state_ |= other.state_;
218
+ while (ensure_enough_sections()) {}
219
+ ensure_space(other.get_num_items());
220
+ sort();
221
+ auto middle = hra_ ? begin() : end();
222
+ auto from = hra_ ? begin() - other.get_num_items() : end();
223
+ auto to = from + other.get_num_items();
224
+ auto other_it = other.begin();
225
+ for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
226
+ if (!other.sorted_) std::sort(from, to, C());
227
+ if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), middle, hra_ ? end() : to, C());
228
+ num_items_ += other.get_num_items();
229
+ }
230
+
231
+ template<typename T, typename C, typename A>
232
+ void req_compactor<T, C, A>::sort() {
233
+ if (!sorted_) {
234
+ std::sort(begin(), end(), C());
235
+ sorted_ = true;
236
+ }
237
+ }
238
+
239
+ template<typename T, typename C, typename A>
240
+ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
241
+ const uint32_t starting_nom_capacity = get_nom_capacity();
242
+ // choose a part of the buffer to compact
243
+ const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
244
+ auto compaction_range = compute_compaction_range(secs_to_compact);
245
+ if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
246
+
247
+ if ((state_ & 1) == 1) { coin_ = !coin_; } // for odd flip coin;
248
+ else { coin_ = req_random_bit(); } // random coin flip
249
+
250
+ const auto num = (compaction_range.second - compaction_range.first) / 2;
251
+ next.ensure_space(num);
252
+ auto next_middle = hra_ ? next.begin() : next.end();
253
+ auto next_empty = hra_ ? next.begin() - num : next.end();
254
+ promote_evens_or_odds(begin() + compaction_range.first, begin() + compaction_range.second, coin_, next_empty);
255
+ next.num_items_ += num;
256
+ std::inplace_merge(next.begin(), next_middle, next.end(), C());
257
+ for (size_t i = compaction_range.first; i < compaction_range.second; ++i) (*(begin() + i)).~T();
258
+ num_items_ -= compaction_range.second - compaction_range.first;
259
+
260
+ ++state_;
261
+ ensure_enough_sections();
262
+ return std::pair<uint32_t, uint32_t>(
263
+ num,
264
+ get_nom_capacity() - starting_nom_capacity
265
+ );
266
+ }
267
+
268
+ template<typename T, typename C, typename A>
269
+ bool req_compactor<T, C, A>::ensure_enough_sections() {
270
+ const float ssr = section_size_raw_ / sqrt(2);
271
+ const uint32_t ne = nearest_even(ssr);
272
+ if (state_ >= static_cast<uint64_t>(1 << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
273
+ section_size_raw_ = ssr;
274
+ section_size_ = ne;
275
+ num_sections_ <<= 1;
276
+ if (capacity_ < 2 * get_nom_capacity()) grow(2 * get_nom_capacity());
277
+ return true;
278
+ }
279
+ return false;
280
+ }
281
+
282
+ template<typename T, typename C, typename A>
283
+ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compute_compaction_range(uint32_t secs_to_compact) const {
284
+ uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
285
+ // make compacted region even
286
+ if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
287
+ const size_t low = hra_ ? 0 : non_compact;
288
+ const size_t high = hra_ ? num_items_ - non_compact : num_items_;
289
+ return std::pair<uint32_t, uint32_t>(low, high);
290
+ }
291
+
292
+ template<typename T, typename C, typename A>
293
+ uint32_t req_compactor<T, C, A>::nearest_even(float value) {
294
+ return static_cast<uint32_t>(round(value / 2)) << 1;
295
+ }
296
+
297
+ template<typename T, typename C, typename A>
298
+ template<typename InIter, typename OutIter>
299
+ void req_compactor<T, C, A>::promote_evens_or_odds(InIter from, InIter to, bool odds, OutIter dst) {
300
+ if (from == to) return;
301
+ InIter i = from;
302
+ if (odds) ++i;
303
+ while (i != to) {
304
+ new (dst) T(std::move(*i));
305
+ ++dst;
306
+ ++i;
307
+ if (i == to) break;
308
+ ++i;
309
+ }
310
+ }
311
+
312
+ // helpers for integral types
313
+ template<typename T>
314
+ static inline T read(std::istream& is) {
315
+ T value;
316
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
317
+ return value;
318
+ }
319
+
320
+ template<typename T>
321
+ static inline void write(std::ostream& os, T value) {
322
+ os.write(reinterpret_cast<const char*>(&value), sizeof(T));
323
+ }
324
+
325
+ // implementation for fixed-size arithmetic types (integral and floating point)
326
+ template<typename T, typename C, typename A>
327
+ template<typename S, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
328
+ size_t req_compactor<T, C, A>::get_serialized_size_bytes(const S&) const {
329
+ return sizeof(state_) + sizeof(section_size_raw_) + sizeof(lg_weight_) + sizeof(num_sections_) +
330
+ sizeof(uint16_t) + // padding
331
+ sizeof(uint32_t) + // num_items
332
+ sizeof(TT) * num_items_;
333
+ }
334
+
335
+ // implementation for all other types
336
+ template<typename T, typename C, typename A>
337
+ template<typename S, typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
338
+ size_t req_compactor<T, C, A>::get_serialized_size_bytes(const S& serde) const {
339
+ size_t size = sizeof(state_) + sizeof(section_size_raw_) + sizeof(lg_weight_) + sizeof(num_sections_) +
340
+ sizeof(uint16_t) + // padding
341
+ sizeof(uint32_t); // num_items
342
+ for (auto it = begin(); it != end(); ++it) size += serde.size_of_item(*it);
343
+ return size;
344
+ }
345
+
346
+ template<typename T, typename C, typename A>
347
+ template<typename S>
348
+ void req_compactor<T, C, A>::serialize(std::ostream& os, const S& serde) const {
349
+ write(os, state_);
350
+ write(os, section_size_raw_);
351
+ write(os, lg_weight_);
352
+ write(os, num_sections_);
353
+ const uint16_t padding = 0;
354
+ write(os, padding);
355
+ write(os, num_items_);
356
+ serde.serialize(os, begin(), num_items_);
357
+ }
358
+
359
+ template<typename T, typename C, typename A>
360
+ template<typename S>
361
+ size_t req_compactor<T, C, A>::serialize(void* dst, size_t capacity, const S& serde) const {
362
+ uint8_t* ptr = static_cast<uint8_t*>(dst);
363
+ const uint8_t* end_ptr = ptr + capacity;
364
+ ptr += copy_to_mem(state_, ptr);
365
+ ptr += copy_to_mem(section_size_raw_, ptr);
366
+ ptr += copy_to_mem(lg_weight_, ptr);
367
+ ptr += copy_to_mem(num_sections_, ptr);
368
+ const uint16_t padding = 0;
369
+ ptr += copy_to_mem(padding, ptr);
370
+ ptr += copy_to_mem(num_items_, ptr);
371
+ ptr += serde.serialize(ptr, end_ptr - ptr, begin(), num_items_);
372
+ return ptr - static_cast<uint8_t*>(dst);
373
+ }
374
+
375
+ template<typename T, typename C, typename A>
376
+ template<typename S>
377
+ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde, const A& allocator, bool sorted, bool hra) {
378
+ auto state = read<decltype(state_)>(is);
379
+ auto section_size_raw = read<decltype(section_size_raw_)>(is);
380
+ auto lg_weight = read<decltype(lg_weight_)>(is);
381
+ auto num_sections = read<decltype(num_sections_)>(is);
382
+ read<uint16_t>(is); // padding
383
+ auto num_items = read<uint32_t>(is);
384
+ auto items = deserialize_items(is, serde, allocator, num_items);
385
+ return req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(items), num_items, allocator);
386
+ }
387
+
388
+ template<typename T, typename C, typename A>
389
+ template<typename S>
390
+ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
391
+ auto items = deserialize_items(is, serde, allocator, num_items);
392
+ return req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(items), num_items, allocator);
393
+ }
394
+
395
+ template<typename T, typename C, typename A>
396
+ template<typename S>
397
+ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, size_t num)
398
+ -> std::unique_ptr<T, items_deleter> {
399
+ A alloc(allocator);
400
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
401
+ serde.deserialize(is, items.get(), num);
402
+ // serde did not throw, enable destructors
403
+ items.get_deleter().set_destroy(true);
404
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
405
+ return std::move(items);
406
+ }
407
+
408
+ template<typename T, typename C, typename A>
409
+ template<typename S>
410
+ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size, const S& serde, const A& allocator, bool sorted, bool hra) {
411
+ ensure_minimum_memory(size, 8);
412
+ const char* ptr = static_cast<const char*>(bytes);
413
+ const char* end_ptr = static_cast<const char*>(bytes) + size;
414
+
415
+ uint64_t state;
416
+ ptr += copy_from_mem(ptr, state);
417
+ float section_size_raw;
418
+ ptr += copy_from_mem(ptr, section_size_raw);
419
+ uint8_t lg_weight;
420
+ ptr += copy_from_mem(ptr, lg_weight);
421
+ uint8_t num_sections;
422
+ ptr += copy_from_mem(ptr, num_sections);
423
+ ptr += 2; // padding
424
+ uint32_t num_items;
425
+ ptr += copy_from_mem(ptr, num_items);
426
+ auto pair = deserialize_items(ptr, end_ptr - ptr, serde, allocator, num_items);
427
+ ptr += pair.second;
428
+ return std::pair<req_compactor, size_t>(
429
+ req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(pair.first), num_items, allocator),
430
+ ptr - static_cast<const char*>(bytes)
431
+ );
432
+ }
433
+
434
+ template<typename T, typename C, typename A>
435
+ template<typename S>
436
+ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size, const S& serde, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
437
+ auto pair = deserialize_items(bytes, size, serde, allocator, num_items);
438
+ return std::pair<req_compactor, size_t>(
439
+ req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(pair.first), num_items, allocator),
440
+ pair.second
441
+ );
442
+ }
443
+
444
+ template<typename T, typename C, typename A>
445
+ template<typename S>
446
+ auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, size_t num)
447
+ -> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
448
+ const char* ptr = static_cast<const char*>(bytes);
449
+ const char* end_ptr = static_cast<const char*>(bytes) + size;
450
+ A alloc(allocator);
451
+ std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
452
+ ptr += serde.deserialize(ptr, end_ptr - ptr, items.get(), num);
453
+ // serde did not throw, enable destructors
454
+ items.get_deleter().set_destroy(true);
455
+ return std::pair<std::unique_ptr<T, items_deleter>, size_t>(
456
+ std::move(items),
457
+ ptr - static_cast<const char*>(bytes)
458
+ );
459
+ }
460
+
461
+
462
+ template<typename T, typename C, typename A>
463
+ req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const A& allocator):
464
+ allocator_(allocator),
465
+ lg_weight_(lg_weight),
466
+ hra_(hra),
467
+ coin_(req_random_bit()),
468
+ sorted_(sorted),
469
+ section_size_raw_(section_size_raw),
470
+ section_size_(nearest_even(section_size_raw)),
471
+ num_sections_(num_sections),
472
+ state_(state),
473
+ num_items_(num_items),
474
+ capacity_(num_items),
475
+ items_(items.release())
476
+ {}
477
+
478
+ template<typename T, typename C, typename A>
479
+ class req_compactor<T, C, A>::items_deleter {
480
+ public:
481
+ items_deleter(const A& allocator, bool destroy, uint32_t num): allocator(allocator), destroy(destroy), num(num) {}
482
+ void operator() (T* ptr) {
483
+ if (ptr != nullptr) {
484
+ if (destroy) {
485
+ for (uint32_t i = 0; i < num; ++i) {
486
+ ptr[i].~T();
487
+ }
488
+ }
489
+ allocator.deallocate(ptr, num);
490
+ }
491
+ }
492
+ void set_destroy(bool destroy) { this->destroy = destroy; }
493
+ private:
494
+ A allocator;
495
+ bool destroy;
496
+ uint32_t num;
497
+ };
498
+
499
+ } /* namespace datasketches */
500
+
501
+ #endif