datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,35 +20,23 @@
20
20
  #ifndef THETA_SKETCH_IMPL_HPP_
21
21
  #define THETA_SKETCH_IMPL_HPP_
22
22
 
23
- #include <algorithm>
24
- #include <cmath>
25
- #include <memory>
26
- #include <functional>
27
- #include <istream>
28
- #include <ostream>
29
23
  #include <sstream>
24
+ #include <vector>
30
25
 
31
- #include "MurmurHash3.h"
32
26
  #include "serde.hpp"
33
27
  #include "binomial_bounds.hpp"
34
- #include "memory_operations.hpp"
28
+ #include "theta_helpers.hpp"
35
29
 
36
30
  namespace datasketches {
37
31
 
38
- /*
39
- * author Alexander Saydakov
40
- * author Lee Rhodes
41
- * author Kevin Lang
42
- */
43
-
44
32
  template<typename A>
45
- theta_sketch_alloc<A>::theta_sketch_alloc(bool is_empty, uint64_t theta):
46
- is_empty_(is_empty), theta_(theta)
47
- {}
33
+ bool theta_sketch_alloc<A>::is_estimation_mode() const {
34
+ return get_theta64() < theta_constants::MAX_THETA && !is_empty();
35
+ }
48
36
 
49
37
  template<typename A>
50
- bool theta_sketch_alloc<A>::is_empty() const {
51
- return is_empty_;
38
+ double theta_sketch_alloc<A>::get_theta() const {
39
+ return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
52
40
  }
53
41
 
54
42
  template<typename A>
@@ -69,182 +57,47 @@ double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
69
57
  }
70
58
 
71
59
  template<typename A>
72
- bool theta_sketch_alloc<A>::is_estimation_mode() const {
73
- return theta_ < MAX_THETA && !is_empty_;
74
- }
75
-
76
- template<typename A>
77
- double theta_sketch_alloc<A>::get_theta() const {
78
- return (double) theta_ / MAX_THETA;
79
- }
80
-
81
- template<typename A>
82
- uint64_t theta_sketch_alloc<A>::get_theta64() const {
83
- return theta_;
84
- }
85
-
86
- template<typename A>
87
- typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
88
- uint8_t preamble_longs;
89
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
90
- uint8_t serial_version;
91
- is.read((char*)&serial_version, sizeof(serial_version));
92
- uint8_t type;
93
- is.read((char*)&type, sizeof(type));
94
- uint8_t lg_nom_size;
95
- is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
96
- uint8_t lg_cur_size;
97
- is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
98
- uint8_t flags_byte;
99
- is.read((char*)&flags_byte, sizeof(flags_byte));
100
- uint16_t seed_hash;
101
- is.read((char*)&seed_hash, sizeof(seed_hash));
102
-
103
- check_serial_version(serial_version, SERIAL_VERSION);
104
-
105
- if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
106
- check_seed_hash(seed_hash, get_seed_hash(seed));
107
- typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
108
- typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
109
- return unique_ptr(
110
- static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(update_theta_sketch_alloc<A>::internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed))),
111
- [](theta_sketch_alloc<A>* ptr) {
112
- ptr->~theta_sketch_alloc();
113
- AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
114
- }
115
- );
116
- } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
117
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
118
- if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
119
- typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
120
- return unique_ptr(
121
- static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
122
- [](theta_sketch_alloc<A>* ptr) {
123
- ptr->~theta_sketch_alloc();
124
- AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
125
- }
126
- );
127
- }
128
- throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
129
- }
130
-
131
- template<typename A>
132
- typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
133
- ensure_minimum_memory(size, static_cast<size_t>(8));
134
- const char* ptr = static_cast<const char*>(bytes);
135
- uint8_t preamble_longs;
136
- ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
137
- uint8_t serial_version;
138
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
139
- uint8_t type;
140
- ptr += copy_from_mem(ptr, &type, sizeof(type));
141
- uint8_t lg_nom_size;
142
- ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
143
- uint8_t lg_cur_size;
144
- ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
145
- uint8_t flags_byte;
146
- ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
147
- uint16_t seed_hash;
148
- ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
149
-
150
- check_serial_version(serial_version, SERIAL_VERSION);
151
-
152
- if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
153
- check_seed_hash(seed_hash, get_seed_hash(seed));
154
- typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
155
- typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
156
- return unique_ptr(
157
- static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(
158
- update_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed))
159
- ),
160
- [](theta_sketch_alloc<A>* ptr) {
161
- ptr->~theta_sketch_alloc();
162
- AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
163
- }
164
- );
165
- } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
166
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
167
- if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
168
- typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
169
- return unique_ptr(
170
- static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
171
- compact_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash))
172
- ),
173
- [](theta_sketch_alloc<A>* ptr) {
174
- ptr->~theta_sketch_alloc();
175
- AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
176
- }
177
- );
178
- }
179
- throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
180
- }
181
-
182
- template<typename A>
183
- uint16_t theta_sketch_alloc<A>::get_seed_hash(uint64_t seed) {
184
- HashState hashes;
185
- MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
186
- return hashes.h1;
187
- }
188
-
189
- template<typename A>
190
- void theta_sketch_alloc<A>::check_sketch_type(uint8_t actual, uint8_t expected) {
191
- if (actual != expected) {
192
- throw std::invalid_argument("Sketch type mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
193
- }
194
- }
195
-
196
- template<typename A>
197
- void theta_sketch_alloc<A>::check_serial_version(uint8_t actual, uint8_t expected) {
198
- if (actual != expected) {
199
- throw std::invalid_argument("Sketch serial version mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
200
- }
201
- }
202
-
203
- template<typename A>
204
- void theta_sketch_alloc<A>::check_seed_hash(uint16_t actual, uint16_t expected) {
205
- if (actual != expected) {
206
- throw std::invalid_argument("Sketch seed hash mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
60
+ string<A> theta_sketch_alloc<A>::to_string(bool detail) const {
61
+ ostrstream os;
62
+ os << "### Theta sketch summary:" << std::endl;
63
+ os << " num retained entries : " << get_num_retained() << std::endl;
64
+ os << " seed hash : " << get_seed_hash() << std::endl;
65
+ os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
66
+ os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
67
+ os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
68
+ os << " theta (fraction) : " << get_theta() << std::endl;
69
+ os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
70
+ os << " estimate : " << this->get_estimate() << std::endl;
71
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
72
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
73
+ print_specifics(os);
74
+ os << "### End sketch summary" << std::endl;
75
+ if (detail) {
76
+ os << "### Retained entries" << std::endl;
77
+ for (const auto& hash: *this) {
78
+ os << hash << std::endl;
79
+ }
80
+ os << "### End retained entries" << std::endl;
207
81
  }
82
+ return os.str();
208
83
  }
209
84
 
210
85
  // update sketch
211
86
 
212
87
  template<typename A>
213
- update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
214
- theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
215
- lg_cur_size_(lg_cur_size),
216
- lg_nom_size_(lg_nom_size),
217
- keys_(1 << lg_cur_size_, 0),
218
- num_keys_(0),
219
- rf_(rf),
220
- p_(p),
221
- seed_(seed),
222
- capacity_(get_capacity(lg_cur_size, lg_nom_size))
223
- {
224
- if (p < 1) this->theta_ *= p;
225
- }
226
-
227
- template<typename A>
228
- update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
229
- theta_sketch_alloc<A>(is_empty, theta),
230
- lg_cur_size_(lg_cur_size),
231
- lg_nom_size_(lg_nom_size),
232
- keys_(std::move(keys)),
233
- num_keys_(num_keys),
234
- rf_(rf),
235
- p_(p),
236
- seed_(seed),
237
- capacity_(get_capacity(lg_cur_size, lg_nom_size))
88
+ update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
89
+ uint64_t theta, uint64_t seed, const A& allocator):
90
+ table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
238
91
  {}
239
92
 
240
93
  template<typename A>
241
- uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
242
- return num_keys_;
94
+ A update_theta_sketch_alloc<A>::get_allocator() const {
95
+ return table_.allocator_;
243
96
  }
244
97
 
245
98
  template<typename A>
246
- uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
247
- return theta_sketch_alloc<A>::get_seed_hash(seed_);
99
+ bool update_theta_sketch_alloc<A>::is_empty() const {
100
+ return table_.is_empty_;
248
101
  }
249
102
 
250
103
  template<typename A>
@@ -253,169 +106,28 @@ bool update_theta_sketch_alloc<A>::is_ordered() const {
253
106
  }
254
107
 
255
108
  template<typename A>
256
- string<A> update_theta_sketch_alloc<A>::to_string(bool print_items) const {
257
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
258
- os << "### Update Theta sketch summary:" << std::endl;
259
- os << " lg nominal size : " << (int) lg_nom_size_ << std::endl;
260
- os << " lg current size : " << (int) lg_cur_size_ << std::endl;
261
- os << " num retained keys : " << num_keys_ << std::endl;
262
- os << " resize factor : " << (1 << rf_) << std::endl;
263
- os << " sampling probability : " << p_ << std::endl;
264
- os << " seed hash : " << this->get_seed_hash() << std::endl;
265
- os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
266
- os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
267
- os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
268
- os << " theta (fraction) : " << this->get_theta() << std::endl;
269
- os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
270
- os << " estimate : " << this->get_estimate() << std::endl;
271
- os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
272
- os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
273
- os << "### End sketch summary" << std::endl;
274
- if (print_items) {
275
- os << "### Retained keys" << std::endl;
276
- for (auto key: *this) os << " " << key << std::endl;
277
- os << "### End retained keys" << std::endl;
278
- }
279
- return os.str();
280
- }
281
-
282
- template<typename A>
283
- void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
284
- const uint8_t preamble_longs_and_rf = 3 | (rf_ << 6);
285
- os.write((char*)&preamble_longs_and_rf, sizeof(preamble_longs_and_rf));
286
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
287
- os.write((char*)&serial_version, sizeof(serial_version));
288
- const uint8_t type = SKETCH_TYPE;
289
- os.write((char*)&type, sizeof(type));
290
- os.write((char*)&lg_nom_size_, sizeof(lg_nom_size_));
291
- os.write((char*)&lg_cur_size_, sizeof(lg_cur_size_));
292
- const uint8_t flags_byte(
293
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
294
- );
295
- os.write((char*)&flags_byte, sizeof(flags_byte));
296
- const uint16_t seed_hash = get_seed_hash();
297
- os.write((char*)&seed_hash, sizeof(seed_hash));
298
- os.write((char*)&num_keys_, sizeof(num_keys_));
299
- os.write((char*)&p_, sizeof(p_));
300
- os.write((char*)&(this->theta_), sizeof(uint64_t));
301
- os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
302
- }
303
-
304
- template<typename A>
305
- vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
306
- const uint8_t preamble_longs = 3;
307
- const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
308
- vector_u8<A> bytes(size);
309
- uint8_t* ptr = bytes.data() + header_size_bytes;
310
-
311
- const uint8_t preamble_longs_and_rf = preamble_longs | (rf_ << 6);
312
- ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
313
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
314
- ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
315
- const uint8_t type = SKETCH_TYPE;
316
- ptr += copy_to_mem(&type, ptr, sizeof(type));
317
- ptr += copy_to_mem(&lg_nom_size_, ptr, sizeof(lg_nom_size_));
318
- ptr += copy_to_mem(&lg_cur_size_, ptr, sizeof(lg_cur_size_));
319
- const uint8_t flags_byte(
320
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
321
- );
322
- ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
323
- const uint16_t seed_hash = get_seed_hash();
324
- ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
325
- ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
326
- ptr += copy_to_mem(&p_, ptr, sizeof(p_));
327
- ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
328
- ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
329
-
330
- return bytes;
331
- }
332
-
333
- template<typename A>
334
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
335
- uint8_t preamble_longs;
336
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
337
- resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
338
- preamble_longs &= 0x3f; // remove resize factor
339
- uint8_t serial_version;
340
- is.read((char*)&serial_version, sizeof(serial_version));
341
- uint8_t type;
342
- is.read((char*)&type, sizeof(type));
343
- uint8_t lg_nom_size;
344
- is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
345
- uint8_t lg_cur_size;
346
- is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
347
- uint8_t flags_byte;
348
- is.read((char*)&flags_byte, sizeof(flags_byte));
349
- uint16_t seed_hash;
350
- is.read((char*)&seed_hash, sizeof(seed_hash));
351
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
352
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
353
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
354
- return internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed);
109
+ uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
110
+ return table_.theta_;
355
111
  }
356
112
 
357
113
  template<typename A>
358
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
359
- uint32_t num_keys;
360
- is.read((char*)&num_keys, sizeof(num_keys));
361
- float p;
362
- is.read((char*)&p, sizeof(p));
363
- uint64_t theta;
364
- is.read((char*)&theta, sizeof(theta));
365
- vector_u64<A> keys(1 << lg_cur_size);
366
- is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
367
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
368
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
369
- return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
114
+ uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
115
+ return table_.num_entries_;
370
116
  }
371
117
 
372
118
  template<typename A>
373
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
374
- ensure_minimum_memory(size, 8);
375
- const char* ptr = static_cast<const char*>(bytes);
376
- uint8_t preamble_longs;
377
- ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
378
- resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
379
- preamble_longs &= 0x3f; // remove resize factor
380
- uint8_t serial_version;
381
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
382
- uint8_t type;
383
- ptr += copy_from_mem(ptr, &type, sizeof(type));
384
- uint8_t lg_nom_size;
385
- ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
386
- uint8_t lg_cur_size;
387
- ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
388
- uint8_t flags_byte;
389
- ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
390
- uint16_t seed_hash;
391
- ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
392
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
393
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
394
- theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
395
- return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed);
119
+ uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
120
+ return compute_seed_hash(table_.seed_);
396
121
  }
397
122
 
398
123
  template<typename A>
399
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
400
- const uint32_t table_size = 1 << lg_cur_size;
401
- ensure_minimum_memory(size, 16 + sizeof(uint64_t) * table_size);
402
- const char* ptr = static_cast<const char*>(bytes);
403
- uint32_t num_keys;
404
- ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
405
- float p;
406
- ptr += copy_from_mem(ptr, &p, sizeof(p));
407
- uint64_t theta;
408
- ptr += copy_from_mem(ptr, &theta, sizeof(theta));
409
- vector_u64<A> keys(table_size);
410
- ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
411
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
412
- return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
124
+ uint8_t update_theta_sketch_alloc<A>::get_lg_k() const {
125
+ return table_.lg_nom_size_;
413
126
  }
414
127
 
415
128
  template<typename A>
416
- void update_theta_sketch_alloc<A>::update(const std::string& value) {
417
- if (value.empty()) return;
418
- update(value.c_str(), value.length());
129
+ auto update_theta_sketch_alloc<A>::get_rf() const -> resize_factor {
130
+ return table_.rf_;
419
131
  }
420
132
 
421
133
  template<typename A>
@@ -460,19 +172,7 @@ void update_theta_sketch_alloc<A>::update(int8_t value) {
460
172
 
461
173
  template<typename A>
462
174
  void update_theta_sketch_alloc<A>::update(double value) {
463
- union {
464
- int64_t long_value;
465
- double double_value;
466
- } long_double_union;
467
-
468
- if (value == 0.0) {
469
- long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
470
- } else if (std::isnan(value)) {
471
- long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
472
- } else {
473
- long_double_union.double_value = value;
474
- }
475
- update(&long_double_union, sizeof(long_double_union));
175
+ update(canonical_double(value));
476
176
  }
477
177
 
478
178
  template<typename A>
@@ -481,157 +181,116 @@ void update_theta_sketch_alloc<A>::update(float value) {
481
181
  }
482
182
 
483
183
  template<typename A>
484
- void update_theta_sketch_alloc<A>::update(const void* data, unsigned length) {
485
- HashState hashes;
486
- MurmurHash3_x64_128(data, length, seed_, hashes);
487
- const uint64_t hash = hashes.h1 >> 1; // Java implementation does logical shift >>> to make values positive
488
- internal_update(hash);
489
- }
490
-
491
- template<typename A>
492
- compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
493
- return compact_theta_sketch_alloc<A>(*this, ordered);
184
+ void update_theta_sketch_alloc<A>::update(const std::string& value) {
185
+ if (value.empty()) return;
186
+ update(value.c_str(), value.length());
494
187
  }
495
188
 
496
189
  template<typename A>
497
- void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
498
- this->is_empty_ = false;
499
- if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
500
- if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
501
- num_keys_++;
502
- if (num_keys_ > capacity_) {
503
- if (lg_cur_size_ <= lg_nom_size_) {
504
- resize();
505
- } else {
506
- rebuild();
507
- }
508
- }
190
+ void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
191
+ const uint64_t hash = table_.hash_and_screen(data, length);
192
+ if (hash == 0) return;
193
+ auto result = table_.find(hash);
194
+ if (!result.second) {
195
+ table_.insert(result.first, hash);
509
196
  }
510
197
  }
511
198
 
512
199
  template<typename A>
513
200
  void update_theta_sketch_alloc<A>::trim() {
514
- if (num_keys_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
201
+ table_.trim();
515
202
  }
516
203
 
517
204
  template<typename A>
518
- void update_theta_sketch_alloc<A>::resize() {
519
- const uint8_t lg_tgt_size = lg_nom_size_ + 1;
520
- const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
521
- const uint8_t lg_new_size = lg_cur_size_ + factor;
522
- const uint32_t new_size = 1 << lg_new_size;
523
- vector_u64<A> new_keys(new_size, 0);
524
- for (uint32_t i = 0; i < keys_.size(); i++) {
525
- if (keys_[i] != 0) {
526
- hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert
527
- }
528
- }
529
- keys_ = std::move(new_keys);
530
- lg_cur_size_ += factor;
531
- capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
532
- }
533
-
534
- template<typename A>
535
- void update_theta_sketch_alloc<A>::rebuild() {
536
- const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
537
- std::nth_element(keys_.begin(), keys_.begin() + pivot, keys_.end());
538
- this->theta_ = keys_[pivot];
539
- vector_u64<A> new_keys(keys_.size(), 0);
540
- num_keys_ = 0;
541
- for (uint32_t i = 0; i < keys_.size(); i++) {
542
- if (keys_[i] != 0 && keys_[i] < this->theta_) {
543
- hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert
544
- num_keys_++;
545
- }
546
- }
547
- keys_ = std::move(new_keys);
205
+ auto update_theta_sketch_alloc<A>::begin() -> iterator {
206
+ return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
548
207
  }
549
208
 
550
209
  template<typename A>
551
- uint32_t update_theta_sketch_alloc<A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
552
- const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
553
- return std::floor(fraction * (1 << lg_cur_size));
210
+ auto update_theta_sketch_alloc<A>::end() -> iterator {
211
+ return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
554
212
  }
555
213
 
556
214
  template<typename A>
557
- uint32_t update_theta_sketch_alloc<A>::get_stride(uint64_t hash, uint8_t lg_size) {
558
- // odd and independent of index assuming lg_size lowest bits of the hash were used for the index
559
- return (2 * static_cast<uint32_t>((hash >> lg_size) & STRIDE_MASK)) + 1;
215
+ auto update_theta_sketch_alloc<A>::begin() const -> const_iterator {
216
+ return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
560
217
  }
561
218
 
562
219
  template<typename A>
563
- bool update_theta_sketch_alloc<A>::hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size) {
564
- const uint32_t mask = (1 << lg_size) - 1;
565
- const uint32_t stride = get_stride(hash, lg_size);
566
- uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
220
+ auto update_theta_sketch_alloc<A>::end() const -> const_iterator {
221
+ return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
222
+ }
567
223
 
568
- // search for duplicate or zero
569
- const uint32_t loop_index = cur_probe;
570
- do {
571
- const uint64_t value = table[cur_probe];
572
- if (value == 0) {
573
- table[cur_probe] = hash; // insert value
574
- return true;
575
- } else if (value == hash) {
576
- return false; // found a duplicate
577
- }
578
- cur_probe = (cur_probe + stride) & mask;
579
- } while (cur_probe != loop_index);
580
- throw std::logic_error("key not found and no empty slots!");
581
- }
582
-
583
- template<typename A>
584
- bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size) {
585
- const uint32_t mask = (1 << lg_size) - 1;
586
- const uint32_t stride = update_theta_sketch_alloc<A>::get_stride(hash, lg_size);
587
- uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
588
- const uint32_t loop_index = cur_probe;
589
- do {
590
- const uint64_t value = table[cur_probe];
591
- if (value == 0) {
592
- return false;
593
- } else if (value == hash) {
594
- return true;
595
- }
596
- cur_probe = (cur_probe + stride) & mask;
597
- } while (cur_probe != loop_index);
598
- throw std::logic_error("key not found and search wrapped");
224
+ template<typename A>
225
+ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
226
+ return compact_theta_sketch_alloc<A>(*this, ordered);
599
227
  }
600
228
 
601
229
  template<typename A>
602
- typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const {
603
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
230
+ void update_theta_sketch_alloc<A>::print_specifics(ostrstream& os) const {
231
+ os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
232
+ os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
233
+ os << " resize factor : " << (1 << table_.rf_) << std::endl;
604
234
  }
605
235
 
236
+ // builder
237
+
606
238
  template<typename A>
607
- typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const {
608
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
239
+ update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
240
+
241
+ template<typename A>
242
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
243
+ return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
609
244
  }
610
245
 
611
246
  // compact sketch
612
247
 
613
248
  template<typename A>
614
- compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
615
- theta_sketch_alloc<A>(is_empty, theta),
616
- keys_(std::move(keys)),
249
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Base& other, bool ordered):
250
+ is_empty_(other.is_empty()),
251
+ is_ordered_(other.is_ordered() || ordered),
252
+ seed_hash_(other.get_seed_hash()),
253
+ theta_(other.get_theta64()),
254
+ entries_(other.get_allocator())
255
+ {
256
+ entries_.reserve(other.get_num_retained());
257
+ std::copy(other.begin(), other.end(), std::back_inserter(entries_));
258
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
259
+ }
260
+
261
+ template<typename A>
262
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
263
+ std::vector<uint64_t, A>&& entries):
264
+ is_empty_(is_empty),
265
+ is_ordered_(is_ordered),
617
266
  seed_hash_(seed_hash),
618
- is_ordered_(is_ordered)
267
+ theta_(theta),
268
+ entries_(std::move(entries))
619
269
  {}
620
270
 
621
271
  template<typename A>
622
- compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered):
623
- theta_sketch_alloc<A>(other),
624
- keys_(other.get_num_retained()),
625
- seed_hash_(other.get_seed_hash()),
626
- is_ordered_(other.is_ordered() || ordered)
627
- {
628
- std::copy(other.begin(), other.end(), keys_.begin());
629
- if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
272
+ A compact_theta_sketch_alloc<A>::get_allocator() const {
273
+ return entries_.get_allocator();
274
+ }
275
+
276
+ template<typename A>
277
+ bool compact_theta_sketch_alloc<A>::is_empty() const {
278
+ return is_empty_;
279
+ }
280
+
281
+ template<typename A>
282
+ bool compact_theta_sketch_alloc<A>::is_ordered() const {
283
+ return is_ordered_;
284
+ }
285
+
286
+ template<typename A>
287
+ uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
288
+ return theta_;
630
289
  }
631
290
 
632
291
  template<typename A>
633
292
  uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
634
- return keys_.size();
293
+ return entries_.size();
635
294
  }
636
295
 
637
296
  template<typename A>
@@ -640,158 +299,148 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
640
299
  }
641
300
 
642
301
  template<typename A>
643
- bool compact_theta_sketch_alloc<A>::is_ordered() const {
644
- return is_ordered_;
302
+ auto compact_theta_sketch_alloc<A>::begin() -> iterator {
303
+ return iterator(entries_.data(), entries_.size(), 0);
645
304
  }
646
305
 
647
306
  template<typename A>
648
- string<A> compact_theta_sketch_alloc<A>::to_string(bool print_items) const {
649
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
650
- os << "### Compact Theta sketch summary:" << std::endl;
651
- os << " num retained keys : " << keys_.size() << std::endl;
652
- os << " seed hash : " << this->get_seed_hash() << std::endl;
653
- os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
654
- os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
655
- os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
656
- os << " theta (fraction) : " << this->get_theta() << std::endl;
657
- os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
658
- os << " estimate : " << this->get_estimate() << std::endl;
659
- os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
660
- os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
661
- os << "### End sketch summary" << std::endl;
662
- if (print_items) {
663
- os << "### Retained keys" << std::endl;
664
- for (auto key: *this) os << " " << key << std::endl;
665
- os << "### End retained keys" << std::endl;
666
- }
667
- return os.str();
307
+ auto compact_theta_sketch_alloc<A>::end() -> iterator {
308
+ return iterator(nullptr, 0, entries_.size());
668
309
  }
669
310
 
311
+ template<typename A>
312
+ auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
313
+ return const_iterator(entries_.data(), entries_.size(), 0);
314
+ }
315
+
316
+ template<typename A>
317
+ auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
318
+ return const_iterator(nullptr, 0, entries_.size());
319
+ }
320
+
321
+ template<typename A>
322
+ void compact_theta_sketch_alloc<A>::print_specifics(ostrstream&) const {}
323
+
670
324
  template<typename A>
671
325
  void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
672
- const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
326
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
673
327
  const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
674
328
  os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
675
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
329
+ const uint8_t serial_version = SERIAL_VERSION;
676
330
  os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
677
331
  const uint8_t type = SKETCH_TYPE;
678
332
  os.write(reinterpret_cast<const char*>(&type), sizeof(type));
679
333
  const uint16_t unused16 = 0;
680
334
  os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
681
335
  const uint8_t flags_byte(
682
- (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
683
- (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
684
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
685
- (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
336
+ (1 << flags::IS_COMPACT) |
337
+ (1 << flags::IS_READ_ONLY) |
338
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
339
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
686
340
  );
687
341
  os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
688
342
  const uint16_t seed_hash = get_seed_hash();
689
- os.write((char*)&seed_hash, sizeof(seed_hash));
343
+ os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
690
344
  if (!this->is_empty()) {
691
345
  if (!is_single_item) {
692
- const uint32_t num_keys = keys_.size();
693
- os.write((char*)&num_keys, sizeof(num_keys));
346
+ const uint32_t num_entries = entries_.size();
347
+ os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
694
348
  const uint32_t unused32 = 0;
695
- os.write((char*)&unused32, sizeof(unused32));
349
+ os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
696
350
  if (this->is_estimation_mode()) {
697
- os.write((char*)&(this->theta_), sizeof(uint64_t));
351
+ os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
698
352
  }
699
353
  }
700
- os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
354
+ os.write(reinterpret_cast<const char*>(entries_.data()), entries_.size() * sizeof(uint64_t));
701
355
  }
702
356
  }
703
357
 
704
358
  template<typename A>
705
- vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
706
- const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
359
+ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
360
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
707
361
  const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
708
- const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
709
- vector_u8<A> bytes(size);
362
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
363
+ + sizeof(uint64_t) * entries_.size();
364
+ vector_bytes bytes(size, 0, entries_.get_allocator());
710
365
  uint8_t* ptr = bytes.data() + header_size_bytes;
711
366
 
712
367
  ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
713
- const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
368
+ const uint8_t serial_version = SERIAL_VERSION;
714
369
  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
715
370
  const uint8_t type = SKETCH_TYPE;
716
371
  ptr += copy_to_mem(&type, ptr, sizeof(type));
717
372
  const uint16_t unused16 = 0;
718
373
  ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
719
374
  const uint8_t flags_byte(
720
- (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
721
- (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
722
- (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
723
- (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
375
+ (1 << flags::IS_COMPACT) |
376
+ (1 << flags::IS_READ_ONLY) |
377
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
378
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
724
379
  );
725
380
  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
726
381
  const uint16_t seed_hash = get_seed_hash();
727
382
  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
728
383
  if (!this->is_empty()) {
729
384
  if (!is_single_item) {
730
- const uint32_t num_keys = keys_.size();
731
- ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
385
+ const uint32_t num_entries = entries_.size();
386
+ ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
732
387
  const uint32_t unused32 = 0;
733
388
  ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
734
389
  if (this->is_estimation_mode()) {
735
- ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
390
+ ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
736
391
  }
737
392
  }
738
- ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
393
+ ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
739
394
  }
740
-
741
395
  return bytes;
742
396
  }
743
397
 
744
398
  template<typename A>
745
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
399
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
746
400
  uint8_t preamble_longs;
747
- is.read((char*)&preamble_longs, sizeof(preamble_longs));
401
+ is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
748
402
  uint8_t serial_version;
749
- is.read((char*)&serial_version, sizeof(serial_version));
403
+ is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
750
404
  uint8_t type;
751
- is.read((char*)&type, sizeof(type));
405
+ is.read(reinterpret_cast<char*>(&type), sizeof(type));
752
406
  uint16_t unused16;
753
- is.read((char*)&unused16, sizeof(unused16));
407
+ is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
754
408
  uint8_t flags_byte;
755
- is.read((char*)&flags_byte, sizeof(flags_byte));
409
+ is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
756
410
  uint16_t seed_hash;
757
- is.read((char*)&seed_hash, sizeof(seed_hash));
758
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
759
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
760
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
761
- if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
762
- return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
763
- }
764
-
765
- template<typename A>
766
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
767
- uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
768
- uint32_t num_keys = 0;
769
-
770
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
411
+ is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
412
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
413
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
414
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
415
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
416
+
417
+ uint64_t theta = theta_constants::MAX_THETA;
418
+ uint32_t num_entries = 0;
771
419
  if (!is_empty) {
772
420
  if (preamble_longs == 1) {
773
- num_keys = 1;
421
+ num_entries = 1;
774
422
  } else {
775
- is.read((char*)&num_keys, sizeof(num_keys));
423
+ is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
776
424
  uint32_t unused32;
777
- is.read((char*)&unused32, sizeof(unused32));
425
+ is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
778
426
  if (preamble_longs > 2) {
779
- is.read((char*)&theta, sizeof(theta));
427
+ is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
780
428
  }
781
429
  }
782
430
  }
783
- vector_u64<A> keys(num_keys);
784
- if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
431
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
432
+ if (!is_empty) is.read(reinterpret_cast<char*>(entries.data()), sizeof(uint64_t) * entries.size());
785
433
 
786
- const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
787
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
788
- return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
434
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
435
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
436
+ return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
789
437
  }
790
438
 
791
439
  template<typename A>
792
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
440
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
793
441
  ensure_minimum_memory(size, 8);
794
442
  const char* ptr = static_cast<const char*>(bytes);
443
+ const char* base = ptr;
795
444
  uint8_t preamble_longs;
796
445
  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
797
446
  uint8_t serial_version;
@@ -804,28 +453,19 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
804
453
  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
805
454
  uint16_t seed_hash;
806
455
  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
807
- theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
808
- theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
809
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
810
- if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
811
- return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
812
- }
813
-
814
- template<typename A>
815
- compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
816
- const char* ptr = static_cast<const char*>(bytes);
817
- const char* base = ptr;
818
-
819
- uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
820
- uint32_t num_keys = 0;
456
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
457
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
458
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
459
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
821
460
 
822
- const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
461
+ uint64_t theta = theta_constants::MAX_THETA;
462
+ uint32_t num_entries = 0;
823
463
  if (!is_empty) {
824
464
  if (preamble_longs == 1) {
825
- num_keys = 1;
465
+ num_entries = 1;
826
466
  } else {
827
467
  ensure_minimum_memory(size, 8); // read the first prelong before this method
828
- ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
468
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
829
469
  uint32_t unused32;
830
470
  ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
831
471
  if (preamble_longs > 2) {
@@ -834,106 +474,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ
834
474
  }
835
475
  }
836
476
  }
837
- const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
838
- check_memory_size(ptr - base + keys_size_bytes, size);
839
- vector_u64<A> keys(num_keys);
840
- if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
841
-
842
- const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
843
- return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
844
- }
845
-
846
- template<typename A>
847
- typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const {
848
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
849
- }
850
-
851
- template<typename A>
852
- typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const {
853
- return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
854
- }
855
-
856
- // builder
857
-
858
- template<typename A>
859
- update_theta_sketch_alloc<A>::builder::builder():
860
- lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
861
-
862
- template<typename A>
863
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
864
- if (lg_k < MIN_LG_K) {
865
- throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
866
- }
867
- lg_k_ = lg_k;
868
- return *this;
869
- }
870
-
871
- template<typename A>
872
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_resize_factor(resize_factor rf) {
873
- rf_ = rf;
874
- return *this;
875
- }
477
+ const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
478
+ check_memory_size(ptr - base + entries_size_bytes, size);
479
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
480
+ if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
876
481
 
877
- template<typename A>
878
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_p(float p) {
879
- p_ = p;
880
- return *this;
881
- }
882
-
883
- template<typename A>
884
- typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_seed(uint64_t seed) {
885
- seed_ = seed;
886
- return *this;
887
- }
888
-
889
- template<typename A>
890
- uint8_t update_theta_sketch_alloc<A>::builder::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
891
- return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
892
- }
893
-
894
- template<typename A>
895
- update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
896
- return update_theta_sketch_alloc<A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
897
- }
898
-
899
- // iterator
900
-
901
- template<typename A>
902
- theta_sketch_alloc<A>::const_iterator::const_iterator(const uint64_t* keys, uint32_t size, uint32_t index):
903
- keys_(keys), size_(size), index_(index) {
904
- while (index_ < size_ && keys_[index_] == 0) ++index_;
905
- }
906
-
907
- template<typename A>
908
- typename theta_sketch_alloc<A>::const_iterator& theta_sketch_alloc<A>::const_iterator::operator++() {
909
- do {
910
- ++index_;
911
- } while (index_ < size_ && keys_[index_] == 0);
912
- return *this;
913
- }
914
-
915
- template<typename A>
916
- typename theta_sketch_alloc<A>::const_iterator theta_sketch_alloc<A>::const_iterator::operator++(int) {
917
- const_iterator tmp(*this);
918
- operator++();
919
- return tmp;
920
- }
921
-
922
- template<typename A>
923
- bool theta_sketch_alloc<A>::const_iterator::operator==(const const_iterator& other) const {
924
- return index_ == other.index_;
925
- }
926
-
927
- template<typename A>
928
- bool theta_sketch_alloc<A>::const_iterator::operator!=(const const_iterator& other) const {
929
- return index_ != other.index_;
930
- }
931
-
932
- template<typename A>
933
- uint64_t theta_sketch_alloc<A>::const_iterator::operator*() const {
934
- return keys_[index_];
482
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
483
+ return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
935
484
  }
936
485
 
937
486
  } /* namespace datasketches */
938
487
 
939
488
  #endif
489
+