datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,35 +20,23 @@
20
20
  #ifndef THETA_SKETCH_IMPL_HPP_
21
21
  #define THETA_SKETCH_IMPL_HPP_
22
22
 
23
- #include <algorithm>
24
- #include <cmath>
25
- #include <memory>
26
- #include <functional>
27
- #include <istream>
28
- #include <ostream>
29
23
  #include <sstream>
24
+ #include <vector>
30
25
 
31
- #include "MurmurHash3.h"
32
26
  #include "serde.hpp"
33
27
  #include "binomial_bounds.hpp"
34
- #include "memory_operations.hpp"
28
+ #include "theta_helpers.hpp"
35
29
 
36
30
  namespace datasketches {
37
31
 
38
- /*
39
- * author Alexander Saydakov
40
- * author Lee Rhodes
41
- * author Kevin Lang
42
- */
43
-
44
32
  template<typename A>
45
- theta_sketch_alloc<A>::theta_sketch_alloc(bool is_empty, uint64_t theta):
46
- is_empty_(is_empty), theta_(theta)
47
- {}
33
+ bool theta_sketch_alloc<A>::is_estimation_mode() const {
34
+ return get_theta64() < theta_constants::MAX_THETA && !is_empty();
35
+ }
48
36
 
49
37
  template<typename A>
50
- bool theta_sketch_alloc<A>::is_empty() const {
51
- return is_empty_;
38
+ double theta_sketch_alloc<A>::get_theta() const {
39
+ return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
52
40
  }
53
41
 
54
42
  template<typename A>
@@ -69,182 +57,47 @@ double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
69
57
  }
70
58
 
71
59
  template<typename A>
72
- bool theta_sketch_alloc<A>::is_estimation_mode() const {
73
- return theta_ < MAX_THETA && !is_empty_;
74
- }
75
-
76
- template<typename A>
77
- double theta_sketch_alloc<A>::get_theta() const {
78
- return (double) theta_ / MAX_THETA;
79
- }
80
-
81
- template<typename A>
82
- uint64_t theta_sketch_alloc<A>::get_theta64() const {
83
- return theta_;
84
- }
85
-
86
- template<typename A>
87
- typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
88
- uint8_t preamble_longs;
89
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
90
- uint8_t serial_version;
91
- is.read((char*)&serial_version, sizeof(serial_version));
92
- uint8_t type;
93
- is.read((char*)&type, sizeof(type));
94
- uint8_t lg_nom_size;
95
- is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
96
- uint8_t lg_cur_size;
97
- is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
98
- uint8_t flags_byte;
99
- is.read((char*)&flags_byte, sizeof(flags_byte));
100
- uint16_t seed_hash;
101
- is.read((char*)&seed_hash, sizeof(seed_hash));
102
-
103
- check_serial_version(serial_version, SERIAL_VERSION);
104
-
105
- if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
106
- check_seed_hash(seed_hash, get_seed_hash(seed));
107
- typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
108
- typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
109
- return unique_ptr(
110
- static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(update_theta_sketch_alloc<A>::internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed))),
111
- [](theta_sketch_alloc<A>* ptr) {
112
- ptr->~theta_sketch_alloc();
113
- AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
114
- }
115
- );
116
- } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
117
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
118
- if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
119
- typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
120
- return unique_ptr(
121
- static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
122
- [](theta_sketch_alloc<A>* ptr) {
123
- ptr->~theta_sketch_alloc();
124
- AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
125
- }
126
- );
127
- }
128
- throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
129
- }
130
-
131
- template<typename A>
132
- typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
133
- ensure_minimum_memory(size, static_cast<size_t>(8));
134
- const char* ptr = static_cast<const char*>(bytes);
135
- uint8_t preamble_longs;
136
- ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
137
- uint8_t serial_version;
138
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
139
- uint8_t type;
140
- ptr += copy_from_mem(ptr, &type, sizeof(type));
141
- uint8_t lg_nom_size;
142
- ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
143
- uint8_t lg_cur_size;
144
- ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
145
- uint8_t flags_byte;
146
- ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
147
- uint16_t seed_hash;
148
- ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
149
-
150
- check_serial_version(serial_version, SERIAL_VERSION);
151
-
152
- if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
153
- check_seed_hash(seed_hash, get_seed_hash(seed));
154
- typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
155
- typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
156
- return unique_ptr(
157
- static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(
158
- update_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed))
159
- ),
160
- [](theta_sketch_alloc<A>* ptr) {
161
- ptr->~theta_sketch_alloc();
162
- AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
163
- }
164
- );
165
- } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
166
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
167
- if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
168
- typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
169
- return unique_ptr(
170
- static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
171
- compact_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash))
172
- ),
173
- [](theta_sketch_alloc<A>* ptr) {
174
- ptr->~theta_sketch_alloc();
175
- AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
176
- }
177
- );
178
- }
179
- throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
180
- }
181
-
182
- template<typename A>
183
- uint16_t theta_sketch_alloc<A>::get_seed_hash(uint64_t seed) {
184
- HashState hashes;
185
- MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
186
- return hashes.h1;
187
- }
188
-
189
- template<typename A>
190
- void theta_sketch_alloc<A>::check_sketch_type(uint8_t actual, uint8_t expected) {
191
- if (actual != expected) {
192
- throw std::invalid_argument("Sketch type mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
193
- }
194
- }
195
-
196
- template<typename A>
197
- void theta_sketch_alloc<A>::check_serial_version(uint8_t actual, uint8_t expected) {
198
- if (actual != expected) {
199
- throw std::invalid_argument("Sketch serial version mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
200
- }
201
- }
202
-
203
- template<typename A>
204
- void theta_sketch_alloc<A>::check_seed_hash(uint16_t actual, uint16_t expected) {
205
- if (actual != expected) {
206
- throw std::invalid_argument("Sketch seed hash mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
60
+ string<A> theta_sketch_alloc<A>::to_string(bool detail) const {
61
+ ostrstream os;
62
+ os << "### Theta sketch summary:" << std::endl;
63
+ os << " num retained entries : " << get_num_retained() << std::endl;
64
+ os << " seed hash : " << get_seed_hash() << std::endl;
65
+ os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
66
+ os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
67
+ os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
68
+ os << " theta (fraction) : " << get_theta() << std::endl;
69
+ os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
70
+ os << " estimate : " << this->get_estimate() << std::endl;
71
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
72
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
73
+ print_specifics(os);
74
+ os << "### End sketch summary" << std::endl;
75
+ if (detail) {
76
+ os << "### Retained entries" << std::endl;
77
+ for (const auto& hash: *this) {
78
+ os << hash << std::endl;
79
+ }
80
+ os << "### End retained entries" << std::endl;
207
81
  }
82
+ return os.str();
208
83
  }
209
84
 
210
85
  // update sketch
211
86
 
212
87
  template<typename A>
213
- update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
214
- theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
215
- lg_cur_size_(lg_cur_size),
216
- lg_nom_size_(lg_nom_size),
217
- keys_(1 << lg_cur_size_, 0),
218
- num_keys_(0),
219
- rf_(rf),
220
- p_(p),
221
- seed_(seed),
222
- capacity_(get_capacity(lg_cur_size, lg_nom_size))
223
- {
224
- if (p < 1) this->theta_ *= p;
225
- }
226
-
227
- template<typename A>
228
- update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
229
- theta_sketch_alloc<A>(is_empty, theta),
230
- lg_cur_size_(lg_cur_size),
231
- lg_nom_size_(lg_nom_size),
232
- keys_(std::move(keys)),
233
- num_keys_(num_keys),
234
- rf_(rf),
235
- p_(p),
236
- seed_(seed),
237
- capacity_(get_capacity(lg_cur_size, lg_nom_size))
88
+ update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
89
+ uint64_t theta, uint64_t seed, const A& allocator):
90
+ table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
238
91
  {}
239
92
 
240
93
  template<typename A>
241
- uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
242
- return num_keys_;
94
+ A update_theta_sketch_alloc<A>::get_allocator() const {
95
+ return table_.allocator_;
243
96
  }
244
97
 
245
98
  template<typename A>
246
- uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
247
- return theta_sketch_alloc<A>::get_seed_hash(seed_);
99
+ bool update_theta_sketch_alloc<A>::is_empty() const {
100
+ return table_.is_empty_;
248
101
  }
249
102
 
250
103
  template<typename A>
@@ -253,169 +106,28 @@ bool update_theta_sketch_alloc<A>::is_ordered() const {
253
106
  }
254
107
 
255
108
  template<typename A>
256
- string<A> update_theta_sketch_alloc<A>::to_string(bool print_items) const {
257
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
258
- os << "### Update Theta sketch summary:" << std::endl;
259
- os << " lg nominal size : " << (int) lg_nom_size_ << std::endl;
260
- os << " lg current size : " << (int) lg_cur_size_ << std::endl;
261
- os << " num retained keys : " << num_keys_ << std::endl;
262
- os << " resize factor : " << (1 << rf_) << std::endl;
263
- os << " sampling probability : " << p_ << std::endl;
264
- os << " seed hash : " << this->get_seed_hash() << std::endl;
265
- os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
266
- os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
267
- os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
268
- os << " theta (fraction) : " << this->get_theta() << std::endl;
269
- os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
270
- os << " estimate : " << this->get_estimate() << std::endl;
271
- os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
272
- os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
273
- os << "### End sketch summary" << std::endl;
274
- if (print_items) {
275
- os << "### Retained keys" << std::endl;
276
- for (auto key: *this) os << " " << key << std::endl;
277
- os << "### End retained keys" << std::endl;
278
- }
279
- return os.str();
280
- }
281
-
282
- template<typename A>
283
- void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
284
- const uint8_t preamble_longs_and_rf = 3 | (rf_ << 6);
285
- os.write((char*)&preamble_longs_and_rf, sizeof(preamble_longs_and_rf));
286
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
287
- os.write((char*)&serial_version, sizeof(serial_version));
288
- const uint8_t type = SKETCH_TYPE;
289
- os.write((char*)&type, sizeof(type));
290
- os.write((char*)&lg_nom_size_, sizeof(lg_nom_size_));
291
- os.write((char*)&lg_cur_size_, sizeof(lg_cur_size_));
292
- const uint8_t flags_byte(
293
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
294
- );
295
- os.write((char*)&flags_byte, sizeof(flags_byte));
296
- const uint16_t seed_hash = get_seed_hash();
297
- os.write((char*)&seed_hash, sizeof(seed_hash));
298
- os.write((char*)&num_keys_, sizeof(num_keys_));
299
- os.write((char*)&p_, sizeof(p_));
300
- os.write((char*)&(this->theta_), sizeof(uint64_t));
301
- os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
302
- }
303
-
304
- template<typename A>
305
- vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
306
- const uint8_t preamble_longs = 3;
307
- const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
308
- vector_u8<A> bytes(size);
309
- uint8_t* ptr = bytes.data() + header_size_bytes;
310
-
311
- const uint8_t preamble_longs_and_rf = preamble_longs | (rf_ << 6);
312
- ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
313
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
314
- ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
315
- const uint8_t type = SKETCH_TYPE;
316
- ptr += copy_to_mem(&type, ptr, sizeof(type));
317
- ptr += copy_to_mem(&lg_nom_size_, ptr, sizeof(lg_nom_size_));
318
- ptr += copy_to_mem(&lg_cur_size_, ptr, sizeof(lg_cur_size_));
319
- const uint8_t flags_byte(
320
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
321
- );
322
- ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
323
- const uint16_t seed_hash = get_seed_hash();
324
- ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
325
- ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
326
- ptr += copy_to_mem(&p_, ptr, sizeof(p_));
327
- ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
328
- ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
329
-
330
- return bytes;
331
- }
332
-
333
- template<typename A>
334
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
335
- uint8_t preamble_longs;
336
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
337
- resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
338
- preamble_longs &= 0x3f; // remove resize factor
339
- uint8_t serial_version;
340
- is.read((char*)&serial_version, sizeof(serial_version));
341
- uint8_t type;
342
- is.read((char*)&type, sizeof(type));
343
- uint8_t lg_nom_size;
344
- is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
345
- uint8_t lg_cur_size;
346
- is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
347
- uint8_t flags_byte;
348
- is.read((char*)&flags_byte, sizeof(flags_byte));
349
- uint16_t seed_hash;
350
- is.read((char*)&seed_hash, sizeof(seed_hash));
351
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
352
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
353
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
354
- return internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed);
109
+ uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
110
+ return table_.theta_;
355
111
  }
356
112
 
357
113
  template<typename A>
358
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
359
- uint32_t num_keys;
360
- is.read((char*)&num_keys, sizeof(num_keys));
361
- float p;
362
- is.read((char*)&p, sizeof(p));
363
- uint64_t theta;
364
- is.read((char*)&theta, sizeof(theta));
365
- vector_u64<A> keys(1 << lg_cur_size);
366
- is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
367
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
368
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
369
- return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
114
+ uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
115
+ return table_.num_entries_;
370
116
  }
371
117
 
372
118
  template<typename A>
373
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
374
- ensure_minimum_memory(size, 8);
375
- const char* ptr = static_cast<const char*>(bytes);
376
- uint8_t preamble_longs;
377
- ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
378
- resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
379
- preamble_longs &= 0x3f; // remove resize factor
380
- uint8_t serial_version;
381
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
382
- uint8_t type;
383
- ptr += copy_from_mem(ptr, &type, sizeof(type));
384
- uint8_t lg_nom_size;
385
- ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
386
- uint8_t lg_cur_size;
387
- ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
388
- uint8_t flags_byte;
389
- ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
390
- uint16_t seed_hash;
391
- ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
392
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
393
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
394
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
395
- return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed);
119
+ uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
120
+ return compute_seed_hash(table_.seed_);
396
121
  }
397
122
 
398
123
  template<typename A>
399
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
400
- const uint32_t table_size = 1 << lg_cur_size;
401
- ensure_minimum_memory(size, 16 + sizeof(uint64_t) * table_size);
402
- const char* ptr = static_cast<const char*>(bytes);
403
- uint32_t num_keys;
404
- ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
405
- float p;
406
- ptr += copy_from_mem(ptr, &p, sizeof(p));
407
- uint64_t theta;
408
- ptr += copy_from_mem(ptr, &theta, sizeof(theta));
409
- vector_u64<A> keys(table_size);
410
- ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
411
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
412
- return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
124
+ uint8_t update_theta_sketch_alloc<A>::get_lg_k() const {
125
+ return table_.lg_nom_size_;
413
126
  }
414
127
 
415
128
  template<typename A>
416
- void update_theta_sketch_alloc<A>::update(const std::string& value) {
417
- if (value.empty()) return;
418
- update(value.c_str(), value.length());
129
+ auto update_theta_sketch_alloc<A>::get_rf() const -> resize_factor {
130
+ return table_.rf_;
419
131
  }
420
132
 
421
133
  template<typename A>
@@ -460,19 +172,7 @@ void update_theta_sketch_alloc<A>::update(int8_t value) {
460
172
 
461
173
  template<typename A>
462
174
  void update_theta_sketch_alloc<A>::update(double value) {
463
- union {
464
- int64_t long_value;
465
- double double_value;
466
- } long_double_union;
467
-
468
- if (value == 0.0) {
469
- long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
470
- } else if (std::isnan(value)) {
471
- long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
472
- } else {
473
- long_double_union.double_value = value;
474
- }
475
- update(&long_double_union, sizeof(long_double_union));
175
+ update(canonical_double(value));
476
176
  }
477
177
 
478
178
  template<typename A>
@@ -481,157 +181,116 @@ void update_theta_sketch_alloc<A>::update(float value) {
481
181
  }
482
182
 
483
183
  template<typename A>
484
- void update_theta_sketch_alloc<A>::update(const void* data, unsigned length) {
485
- HashState hashes;
486
- MurmurHash3_x64_128(data, length, seed_, hashes);
487
- const uint64_t hash = hashes.h1 >> 1; // Java implementation does logical shift >>> to make values positive
488
- internal_update(hash);
489
- }
490
-
491
- template<typename A>
492
- compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
493
- return compact_theta_sketch_alloc<A>(*this, ordered);
184
+ void update_theta_sketch_alloc<A>::update(const std::string& value) {
185
+ if (value.empty()) return;
186
+ update(value.c_str(), value.length());
494
187
  }
495
188
 
496
189
  template<typename A>
497
- void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
498
- this->is_empty_ = false;
499
- if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
500
- if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
501
- num_keys_++;
502
- if (num_keys_ > capacity_) {
503
- if (lg_cur_size_ <= lg_nom_size_) {
504
- resize();
505
- } else {
506
- rebuild();
507
- }
508
- }
190
+ void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
191
+ const uint64_t hash = table_.hash_and_screen(data, length);
192
+ if (hash == 0) return;
193
+ auto result = table_.find(hash);
194
+ if (!result.second) {
195
+ table_.insert(result.first, hash);
509
196
  }
510
197
  }
511
198
 
512
199
  template<typename A>
513
200
  void update_theta_sketch_alloc<A>::trim() {
514
- if (num_keys_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
201
+ table_.trim();
515
202
  }
516
203
 
517
204
  template<typename A>
518
- void update_theta_sketch_alloc<A>::resize() {
519
- const uint8_t lg_tgt_size = lg_nom_size_ + 1;
520
- const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
521
- const uint8_t lg_new_size = lg_cur_size_ + factor;
522
- const uint32_t new_size = 1 << lg_new_size;
523
- vector_u64<A> new_keys(new_size, 0);
524
- for (uint32_t i = 0; i < keys_.size(); i++) {
525
- if (keys_[i] != 0) {
526
- hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert
527
- }
528
- }
529
- keys_ = std::move(new_keys);
530
- lg_cur_size_ += factor;
531
- capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
532
- }
533
-
534
- template<typename A>
535
- void update_theta_sketch_alloc<A>::rebuild() {
536
- const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
537
- std::nth_element(keys_.begin(), keys_.begin() + pivot, keys_.end());
538
- this->theta_ = keys_[pivot];
539
- vector_u64<A> new_keys(keys_.size(), 0);
540
- num_keys_ = 0;
541
- for (uint32_t i = 0; i < keys_.size(); i++) {
542
- if (keys_[i] != 0 && keys_[i] < this->theta_) {
543
- hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert
544
- num_keys_++;
545
- }
546
- }
547
- keys_ = std::move(new_keys);
205
+ auto update_theta_sketch_alloc<A>::begin() -> iterator {
206
+ return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
548
207
  }
549
208
 
550
209
  template<typename A>
551
- uint32_t update_theta_sketch_alloc<A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
552
- const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
553
- return std::floor(fraction * (1 << lg_cur_size));
210
+ auto update_theta_sketch_alloc<A>::end() -> iterator {
211
+ return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
554
212
  }
555
213
 
556
214
  template<typename A>
557
- uint32_t update_theta_sketch_alloc<A>::get_stride(uint64_t hash, uint8_t lg_size) {
558
- // odd and independent of index assuming lg_size lowest bits of the hash were used for the index
559
- return (2 * static_cast<uint32_t>((hash >> lg_size) & STRIDE_MASK)) + 1;
215
+ auto update_theta_sketch_alloc<A>::begin() const -> const_iterator {
216
+ return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
560
217
  }
561
218
 
562
219
  template<typename A>
563
- bool update_theta_sketch_alloc<A>::hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size) {
564
- const uint32_t mask = (1 << lg_size) - 1;
565
- const uint32_t stride = get_stride(hash, lg_size);
566
- uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
220
+ auto update_theta_sketch_alloc<A>::end() const -> const_iterator {
221
+ return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
222
+ }
567
223
 
568
- // search for duplicate or zero
569
- const uint32_t loop_index = cur_probe;
570
- do {
571
- const uint64_t value = table[cur_probe];
572
- if (value == 0) {
573
- table[cur_probe] = hash; // insert value
574
- return true;
575
- } else if (value == hash) {
576
- return false; // found a duplicate
577
- }
578
- cur_probe = (cur_probe + stride) & mask;
579
- } while (cur_probe != loop_index);
580
- throw std::logic_error("key not found and no empty slots!");
581
- }
582
-
583
- template<typename A>
584
- bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size) {
585
- const uint32_t mask = (1 << lg_size) - 1;
586
- const uint32_t stride = update_theta_sketch_alloc<A>::get_stride(hash, lg_size);
587
- uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
588
- const uint32_t loop_index = cur_probe;
589
- do {
590
- const uint64_t value = table[cur_probe];
591
- if (value == 0) {
592
- return false;
593
- } else if (value == hash) {
594
- return true;
595
- }
596
- cur_probe = (cur_probe + stride) & mask;
597
- } while (cur_probe != loop_index);
598
- throw std::logic_error("key not found and search wrapped");
224
+ template<typename A>
225
+ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
226
+ return compact_theta_sketch_alloc<A>(*this, ordered);
599
227
  }
600
228
 
601
229
  template<typename A>
602
- typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const {
603
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
230
+ void update_theta_sketch_alloc<A>::print_specifics(ostrstream& os) const {
231
+ os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
232
+ os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
233
+ os << " resize factor : " << (1 << table_.rf_) << std::endl;
604
234
  }
605
235
 
236
+ // builder
237
+
606
238
  template<typename A>
607
- typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const {
608
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
239
+ update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
240
+
241
+ template<typename A>
242
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
243
+ return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
609
244
  }
610
245
 
611
246
  // compact sketch
612
247
 
613
248
  template<typename A>
614
- compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
615
- theta_sketch_alloc<A>(is_empty, theta),
616
- keys_(std::move(keys)),
249
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Base& other, bool ordered):
250
+ is_empty_(other.is_empty()),
251
+ is_ordered_(other.is_ordered() || ordered),
252
+ seed_hash_(other.get_seed_hash()),
253
+ theta_(other.get_theta64()),
254
+ entries_(other.get_allocator())
255
+ {
256
+ entries_.reserve(other.get_num_retained());
257
+ std::copy(other.begin(), other.end(), std::back_inserter(entries_));
258
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
259
+ }
260
+
261
+ template<typename A>
262
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
263
+ std::vector<uint64_t, A>&& entries):
264
+ is_empty_(is_empty),
265
+ is_ordered_(is_ordered),
617
266
  seed_hash_(seed_hash),
618
- is_ordered_(is_ordered)
267
+ theta_(theta),
268
+ entries_(std::move(entries))
619
269
  {}
620
270
 
621
271
  template<typename A>
622
- compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered):
623
- theta_sketch_alloc<A>(other),
624
- keys_(other.get_num_retained()),
625
- seed_hash_(other.get_seed_hash()),
626
- is_ordered_(other.is_ordered() || ordered)
627
- {
628
- std::copy(other.begin(), other.end(), keys_.begin());
629
- if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
272
+ A compact_theta_sketch_alloc<A>::get_allocator() const {
273
+ return entries_.get_allocator();
274
+ }
275
+
276
+ template<typename A>
277
+ bool compact_theta_sketch_alloc<A>::is_empty() const {
278
+ return is_empty_;
279
+ }
280
+
281
+ template<typename A>
282
+ bool compact_theta_sketch_alloc<A>::is_ordered() const {
283
+ return is_ordered_;
284
+ }
285
+
286
+ template<typename A>
287
+ uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
288
+ return theta_;
630
289
  }
631
290
 
632
291
  template<typename A>
633
292
  uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
634
- return keys_.size();
293
+ return entries_.size();
635
294
  }
636
295
 
637
296
  template<typename A>
@@ -640,158 +299,148 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
640
299
  }
641
300
 
642
301
  template<typename A>
643
- bool compact_theta_sketch_alloc<A>::is_ordered() const {
644
- return is_ordered_;
302
+ auto compact_theta_sketch_alloc<A>::begin() -> iterator {
303
+ return iterator(entries_.data(), entries_.size(), 0);
645
304
  }
646
305
 
647
306
  template<typename A>
648
- string<A> compact_theta_sketch_alloc<A>::to_string(bool print_items) const {
649
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
650
- os << "### Compact Theta sketch summary:" << std::endl;
651
- os << " num retained keys : " << keys_.size() << std::endl;
652
- os << " seed hash : " << this->get_seed_hash() << std::endl;
653
- os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
654
- os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
655
- os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
656
- os << " theta (fraction) : " << this->get_theta() << std::endl;
657
- os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
658
- os << " estimate : " << this->get_estimate() << std::endl;
659
- os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
660
- os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
661
- os << "### End sketch summary" << std::endl;
662
- if (print_items) {
663
- os << "### Retained keys" << std::endl;
664
- for (auto key: *this) os << " " << key << std::endl;
665
- os << "### End retained keys" << std::endl;
666
- }
667
- return os.str();
307
+ auto compact_theta_sketch_alloc<A>::end() -> iterator {
308
+ return iterator(nullptr, 0, entries_.size());
668
309
  }
669
310
 
311
+ template<typename A>
312
+ auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
313
+ return const_iterator(entries_.data(), entries_.size(), 0);
314
+ }
315
+
316
+ template<typename A>
317
+ auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
318
+ return const_iterator(nullptr, 0, entries_.size());
319
+ }
320
+
321
+ template<typename A>
322
+ void compact_theta_sketch_alloc<A>::print_specifics(ostrstream&) const {}
323
+
670
324
  template<typename A>
671
325
  void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
672
- const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
326
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
673
327
  const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
674
328
  os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
675
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
329
+ const uint8_t serial_version = SERIAL_VERSION;
676
330
  os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
677
331
  const uint8_t type = SKETCH_TYPE;
678
332
  os.write(reinterpret_cast<const char*>(&type), sizeof(type));
679
333
  const uint16_t unused16 = 0;
680
334
  os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
681
335
  const uint8_t flags_byte(
682
- (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
683
- (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
684
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
685
- (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
336
+ (1 << flags::IS_COMPACT) |
337
+ (1 << flags::IS_READ_ONLY) |
338
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
339
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
686
340
  );
687
341
  os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
688
342
  const uint16_t seed_hash = get_seed_hash();
689
- os.write((char*)&seed_hash, sizeof(seed_hash));
343
+ os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
690
344
  if (!this->is_empty()) {
691
345
  if (!is_single_item) {
692
- const uint32_t num_keys = keys_.size();
693
- os.write((char*)&num_keys, sizeof(num_keys));
346
+ const uint32_t num_entries = entries_.size();
347
+ os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
694
348
  const uint32_t unused32 = 0;
695
- os.write((char*)&unused32, sizeof(unused32));
349
+ os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
696
350
  if (this->is_estimation_mode()) {
697
- os.write((char*)&(this->theta_), sizeof(uint64_t));
351
+ os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
698
352
  }
699
353
  }
700
- os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
354
+ os.write(reinterpret_cast<const char*>(entries_.data()), entries_.size() * sizeof(uint64_t));
701
355
  }
702
356
  }
703
357
 
704
358
  template<typename A>
705
- vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
706
- const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
359
+ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
360
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
707
361
  const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
708
- const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
709
- vector_u8<A> bytes(size);
362
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
363
+ + sizeof(uint64_t) * entries_.size();
364
+ vector_bytes bytes(size, 0, entries_.get_allocator());
710
365
  uint8_t* ptr = bytes.data() + header_size_bytes;
711
366
 
712
367
  ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
713
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
368
+ const uint8_t serial_version = SERIAL_VERSION;
714
369
  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
715
370
  const uint8_t type = SKETCH_TYPE;
716
371
  ptr += copy_to_mem(&type, ptr, sizeof(type));
717
372
  const uint16_t unused16 = 0;
718
373
  ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
719
374
  const uint8_t flags_byte(
720
- (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
721
- (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
722
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
723
- (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
375
+ (1 << flags::IS_COMPACT) |
376
+ (1 << flags::IS_READ_ONLY) |
377
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
378
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
724
379
  );
725
380
  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
726
381
  const uint16_t seed_hash = get_seed_hash();
727
382
  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
728
383
  if (!this->is_empty()) {
729
384
  if (!is_single_item) {
730
- const uint32_t num_keys = keys_.size();
731
- ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
385
+ const uint32_t num_entries = entries_.size();
386
+ ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
732
387
  const uint32_t unused32 = 0;
733
388
  ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
734
389
  if (this->is_estimation_mode()) {
735
- ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
390
+ ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
736
391
  }
737
392
  }
738
- ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
393
+ ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
739
394
  }
740
-
741
395
  return bytes;
742
396
  }
743
397
 
744
398
  template<typename A>
745
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
399
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
746
400
  uint8_t preamble_longs;
747
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
401
+ is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
748
402
  uint8_t serial_version;
749
- is.read((char*)&serial_version, sizeof(serial_version));
403
+ is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
750
404
  uint8_t type;
751
- is.read((char*)&type, sizeof(type));
405
+ is.read(reinterpret_cast<char*>(&type), sizeof(type));
752
406
  uint16_t unused16;
753
- is.read((char*)&unused16, sizeof(unused16));
407
+ is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
754
408
  uint8_t flags_byte;
755
- is.read((char*)&flags_byte, sizeof(flags_byte));
409
+ is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
756
410
  uint16_t seed_hash;
757
- is.read((char*)&seed_hash, sizeof(seed_hash));
758
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
759
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
760
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
761
- if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
762
- return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
763
- }
764
-
765
- template<typename A>
766
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
767
- uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
768
- uint32_t num_keys = 0;
769
-
770
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
411
+ is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
412
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
413
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
414
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
415
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
416
+
417
+ uint64_t theta = theta_constants::MAX_THETA;
418
+ uint32_t num_entries = 0;
771
419
  if (!is_empty) {
772
420
  if (preamble_longs == 1) {
773
- num_keys = 1;
421
+ num_entries = 1;
774
422
  } else {
775
- is.read((char*)&num_keys, sizeof(num_keys));
423
+ is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
776
424
  uint32_t unused32;
777
- is.read((char*)&unused32, sizeof(unused32));
425
+ is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
778
426
  if (preamble_longs > 2) {
779
- is.read((char*)&theta, sizeof(theta));
427
+ is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
780
428
  }
781
429
  }
782
430
  }
783
- vector_u64<A> keys(num_keys);
784
- if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
431
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
432
+ if (!is_empty) is.read(reinterpret_cast<char*>(entries.data()), sizeof(uint64_t) * entries.size());
785
433
 
786
- const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
787
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
788
- return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
434
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
435
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
436
+ return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
789
437
  }
790
438
 
791
439
  template<typename A>
792
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
440
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
793
441
  ensure_minimum_memory(size, 8);
794
442
  const char* ptr = static_cast<const char*>(bytes);
443
+ const char* base = ptr;
795
444
  uint8_t preamble_longs;
796
445
  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
797
446
  uint8_t serial_version;
@@ -804,28 +453,19 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
804
453
  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
805
454
  uint16_t seed_hash;
806
455
  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
807
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
808
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
809
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
810
- if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
811
- return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
812
- }
813
-
814
- template<typename A>
815
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
816
- const char* ptr = static_cast<const char*>(bytes);
817
- const char* base = ptr;
818
-
819
- uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
820
- uint32_t num_keys = 0;
456
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
457
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
458
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
459
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
821
460
 
822
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
461
+ uint64_t theta = theta_constants::MAX_THETA;
462
+ uint32_t num_entries = 0;
823
463
  if (!is_empty) {
824
464
  if (preamble_longs == 1) {
825
- num_keys = 1;
465
+ num_entries = 1;
826
466
  } else {
827
467
  ensure_minimum_memory(size, 8); // read the first prelong before this method
828
- ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
468
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
829
469
  uint32_t unused32;
830
470
  ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
831
471
  if (preamble_longs > 2) {
@@ -834,106 +474,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ
834
474
  }
835
475
  }
836
476
  }
837
- const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
838
- check_memory_size(ptr - base + keys_size_bytes, size);
839
- vector_u64<A> keys(num_keys);
840
- if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
841
-
842
- const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
843
- return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
844
- }
845
-
846
- template<typename A>
847
- typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const {
848
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
849
- }
850
-
851
- template<typename A>
852
- typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const {
853
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
854
- }
855
-
856
- // builder
857
-
858
- template<typename A>
859
- update_theta_sketch_alloc<A>::builder::builder():
860
- lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
861
-
862
- template<typename A>
863
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
864
- if (lg_k < MIN_LG_K) {
865
- throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
866
- }
867
- lg_k_ = lg_k;
868
- return *this;
869
- }
870
-
871
- template<typename A>
872
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_resize_factor(resize_factor rf) {
873
- rf_ = rf;
874
- return *this;
875
- }
477
+ const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
478
+ check_memory_size(ptr - base + entries_size_bytes, size);
479
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
480
+ if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
876
481
 
877
- template<typename A>
878
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_p(float p) {
879
- p_ = p;
880
- return *this;
881
- }
882
-
883
- template<typename A>
884
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_seed(uint64_t seed) {
885
- seed_ = seed;
886
- return *this;
887
- }
888
-
889
- template<typename A>
890
- uint8_t update_theta_sketch_alloc<A>::builder::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
891
- return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
892
- }
893
-
894
- template<typename A>
895
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
896
- return update_theta_sketch_alloc<A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
897
- }
898
-
899
- // iterator
900
-
901
- template<typename A>
902
- theta_sketch_alloc<A>::const_iterator::const_iterator(const uint64_t* keys, uint32_t size, uint32_t index):
903
- keys_(keys), size_(size), index_(index) {
904
- while (index_ < size_ && keys_[index_] == 0) ++index_;
905
- }
906
-
907
- template<typename A>
908
- typename theta_sketch_alloc<A>::const_iterator& theta_sketch_alloc<A>::const_iterator::operator++() {
909
- do {
910
- ++index_;
911
- } while (index_ < size_ && keys_[index_] == 0);
912
- return *this;
913
- }
914
-
915
- template<typename A>
916
- typename theta_sketch_alloc<A>::const_iterator theta_sketch_alloc<A>::const_iterator::operator++(int) {
917
- const_iterator tmp(*this);
918
- operator++();
919
- return tmp;
920
- }
921
-
922
- template<typename A>
923
- bool theta_sketch_alloc<A>::const_iterator::operator==(const const_iterator& other) const {
924
- return index_ == other.index_;
925
- }
926
-
927
- template<typename A>
928
- bool theta_sketch_alloc<A>::const_iterator::operator!=(const const_iterator& other) const {
929
- return index_ != other.index_;
930
- }
931
-
932
- template<typename A>
933
- uint64_t theta_sketch_alloc<A>::const_iterator::operator*() const {
934
- return keys_[index_];
482
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
483
+ return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
935
484
  }
936
485
 
937
486
  } /* namespace datasketches */
938
487
 
939
488
  #endif
489
+