datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -1,481 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <sstream>
21
-
22
- #include "serde.hpp"
23
- #include "binomial_bounds.hpp"
24
- #include "theta_helpers.hpp"
25
-
26
- namespace datasketches {
27
-
28
- template<typename A>
29
- bool theta_sketch_experimental<A>::is_estimation_mode() const {
30
- return get_theta64() < theta_constants::MAX_THETA && !is_empty();
31
- }
32
-
33
- template<typename A>
34
- double theta_sketch_experimental<A>::get_theta() const {
35
- return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
36
- }
37
-
38
- template<typename A>
39
- double theta_sketch_experimental<A>::get_estimate() const {
40
- return get_num_retained() / get_theta();
41
- }
42
-
43
- template<typename A>
44
- double theta_sketch_experimental<A>::get_lower_bound(uint8_t num_std_devs) const {
45
- if (!is_estimation_mode()) return get_num_retained();
46
- return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
47
- }
48
-
49
- template<typename A>
50
- double theta_sketch_experimental<A>::get_upper_bound(uint8_t num_std_devs) const {
51
- if (!is_estimation_mode()) return get_num_retained();
52
- return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
53
- }
54
-
55
- template<typename A>
56
- string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
57
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
58
- os << "### Theta sketch summary:" << std::endl;
59
- os << " num retained entries : " << get_num_retained() << std::endl;
60
- os << " seed hash : " << get_seed_hash() << std::endl;
61
- os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
62
- os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
63
- os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
64
- os << " theta (fraction) : " << get_theta() << std::endl;
65
- os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
66
- os << " estimate : " << this->get_estimate() << std::endl;
67
- os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
68
- os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
69
- print_specifics(os);
70
- os << "### End sketch summary" << std::endl;
71
- if (detail) {
72
- os << "### Retained entries" << std::endl;
73
- for (const auto& hash: *this) {
74
- os << hash << std::endl;
75
- }
76
- os << "### End retained entries" << std::endl;
77
- }
78
- return os.str();
79
- }
80
-
81
- // update sketch
82
-
83
- template<typename A>
84
- update_theta_sketch_experimental<A>::update_theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
85
- uint64_t theta, uint64_t seed, const A& allocator):
86
- table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
87
- {}
88
-
89
- template<typename A>
90
- A update_theta_sketch_experimental<A>::get_allocator() const {
91
- return table_.allocator_;
92
- }
93
-
94
- template<typename A>
95
- bool update_theta_sketch_experimental<A>::is_empty() const {
96
- return table_.is_empty_;
97
- }
98
-
99
- template<typename A>
100
- bool update_theta_sketch_experimental<A>::is_ordered() const {
101
- return false;
102
- }
103
-
104
- template<typename A>
105
- uint64_t update_theta_sketch_experimental<A>::get_theta64() const {
106
- return table_.theta_;
107
- }
108
-
109
- template<typename A>
110
- uint32_t update_theta_sketch_experimental<A>::get_num_retained() const {
111
- return table_.num_entries_;
112
- }
113
-
114
- template<typename A>
115
- uint16_t update_theta_sketch_experimental<A>::get_seed_hash() const {
116
- return compute_seed_hash(table_.seed_);
117
- }
118
-
119
- template<typename A>
120
- uint8_t update_theta_sketch_experimental<A>::get_lg_k() const {
121
- return table_.lg_nom_size_;
122
- }
123
-
124
- template<typename A>
125
- auto update_theta_sketch_experimental<A>::get_rf() const -> resize_factor {
126
- return table_.rf_;
127
- }
128
-
129
- template<typename A>
130
- void update_theta_sketch_experimental<A>::update(uint64_t value) {
131
- update(&value, sizeof(value));
132
- }
133
-
134
- template<typename A>
135
- void update_theta_sketch_experimental<A>::update(int64_t value) {
136
- update(&value, sizeof(value));
137
- }
138
-
139
- template<typename A>
140
- void update_theta_sketch_experimental<A>::update(uint32_t value) {
141
- update(static_cast<int32_t>(value));
142
- }
143
-
144
- template<typename A>
145
- void update_theta_sketch_experimental<A>::update(int32_t value) {
146
- update(static_cast<int64_t>(value));
147
- }
148
-
149
- template<typename A>
150
- void update_theta_sketch_experimental<A>::update(uint16_t value) {
151
- update(static_cast<int16_t>(value));
152
- }
153
-
154
- template<typename A>
155
- void update_theta_sketch_experimental<A>::update(int16_t value) {
156
- update(static_cast<int64_t>(value));
157
- }
158
-
159
- template<typename A>
160
- void update_theta_sketch_experimental<A>::update(uint8_t value) {
161
- update(static_cast<int8_t>(value));
162
- }
163
-
164
- template<typename A>
165
- void update_theta_sketch_experimental<A>::update(int8_t value) {
166
- update(static_cast<int64_t>(value));
167
- }
168
-
169
- template<typename A>
170
- void update_theta_sketch_experimental<A>::update(double value) {
171
- update(canonical_double(value));
172
- }
173
-
174
- template<typename A>
175
- void update_theta_sketch_experimental<A>::update(float value) {
176
- update(static_cast<double>(value));
177
- }
178
-
179
- template<typename A>
180
- void update_theta_sketch_experimental<A>::update(const std::string& value) {
181
- if (value.empty()) return;
182
- update(value.c_str(), value.length());
183
- }
184
-
185
- template<typename A>
186
- void update_theta_sketch_experimental<A>::update(const void* data, size_t length) {
187
- const uint64_t hash = table_.hash_and_screen(data, length);
188
- if (hash == 0) return;
189
- auto result = table_.find(hash);
190
- if (!result.second) {
191
- table_.insert(result.first, hash);
192
- }
193
- }
194
-
195
- template<typename A>
196
- void update_theta_sketch_experimental<A>::trim() {
197
- table_.trim();
198
- }
199
-
200
- template<typename A>
201
- auto update_theta_sketch_experimental<A>::begin() -> iterator {
202
- return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
203
- }
204
-
205
- template<typename A>
206
- auto update_theta_sketch_experimental<A>::end() -> iterator {
207
- return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
208
- }
209
-
210
- template<typename A>
211
- auto update_theta_sketch_experimental<A>::begin() const -> const_iterator {
212
- return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
213
- }
214
-
215
- template<typename A>
216
- auto update_theta_sketch_experimental<A>::end() const -> const_iterator {
217
- return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
218
- }
219
- template<typename A>
220
- compact_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::compact(bool ordered) const {
221
- return compact_theta_sketch_experimental<A>(*this, ordered);
222
- }
223
-
224
- template<typename A>
225
- void update_theta_sketch_experimental<A>::print_specifics(std::ostringstream& os) const {
226
- os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
227
- os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
228
- os << " resize factor : " << (1 << table_.rf_) << std::endl;
229
- }
230
-
231
- // builder
232
-
233
- template<typename A>
234
- update_theta_sketch_experimental<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
235
-
236
- template<typename A>
237
- update_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::builder::build() const {
238
- return update_theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
239
- }
240
-
241
- // experimental compact theta sketch
242
-
243
- template<typename A>
244
- compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const Base& other, bool ordered):
245
- is_empty_(other.is_empty()),
246
- is_ordered_(other.is_ordered() || ordered),
247
- seed_hash_(other.get_seed_hash()),
248
- theta_(other.get_theta64()),
249
- entries_(other.get_allocator())
250
- {
251
- entries_.reserve(other.get_num_retained());
252
- std::copy(other.begin(), other.end(), std::back_inserter(entries_));
253
- if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
254
- }
255
-
256
- template<typename A>
257
- compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
258
- std::vector<uint64_t, A>&& entries):
259
- is_empty_(is_empty),
260
- is_ordered_(is_ordered),
261
- seed_hash_(seed_hash),
262
- theta_(theta),
263
- entries_(std::move(entries))
264
- {}
265
-
266
- template<typename A>
267
- A compact_theta_sketch_experimental<A>::get_allocator() const {
268
- return entries_.get_allocator();
269
- }
270
-
271
- template<typename A>
272
- bool compact_theta_sketch_experimental<A>::is_empty() const {
273
- return is_empty_;
274
- }
275
-
276
- template<typename A>
277
- bool compact_theta_sketch_experimental<A>::is_ordered() const {
278
- return is_ordered_;
279
- }
280
-
281
- template<typename A>
282
- uint64_t compact_theta_sketch_experimental<A>::get_theta64() const {
283
- return theta_;
284
- }
285
-
286
- template<typename A>
287
- uint32_t compact_theta_sketch_experimental<A>::get_num_retained() const {
288
- return entries_.size();
289
- }
290
-
291
- template<typename A>
292
- uint16_t compact_theta_sketch_experimental<A>::get_seed_hash() const {
293
- return seed_hash_;
294
- }
295
-
296
- template<typename A>
297
- auto compact_theta_sketch_experimental<A>::begin() -> iterator {
298
- return iterator(entries_.data(), entries_.size(), 0);
299
- }
300
-
301
- template<typename A>
302
- auto compact_theta_sketch_experimental<A>::end() -> iterator {
303
- return iterator(nullptr, 0, entries_.size());
304
- }
305
-
306
- template<typename A>
307
- auto compact_theta_sketch_experimental<A>::begin() const -> const_iterator {
308
- return const_iterator(entries_.data(), entries_.size(), 0);
309
- }
310
-
311
- template<typename A>
312
- auto compact_theta_sketch_experimental<A>::end() const -> const_iterator {
313
- return const_iterator(nullptr, 0, entries_.size());
314
- }
315
-
316
- template<typename A>
317
- void compact_theta_sketch_experimental<A>::print_specifics(std::ostringstream&) const {}
318
-
319
- template<typename A>
320
- void compact_theta_sketch_experimental<A>::serialize(std::ostream& os) const {
321
- const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
322
- const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
323
- os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
324
- const uint8_t serial_version = SERIAL_VERSION;
325
- os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
326
- const uint8_t type = SKETCH_TYPE;
327
- os.write(reinterpret_cast<const char*>(&type), sizeof(type));
328
- const uint16_t unused16 = 0;
329
- os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
330
- const uint8_t flags_byte(
331
- (1 << flags::IS_COMPACT) |
332
- (1 << flags::IS_READ_ONLY) |
333
- (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
334
- (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
335
- );
336
- os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
337
- const uint16_t seed_hash = get_seed_hash();
338
- os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
339
- if (!this->is_empty()) {
340
- if (!is_single_item) {
341
- const uint32_t num_entries = entries_.size();
342
- os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
343
- const uint32_t unused32 = 0;
344
- os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
345
- if (this->is_estimation_mode()) {
346
- os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
347
- }
348
- }
349
- os.write(reinterpret_cast<const char*>(entries_.data()), entries_.size() * sizeof(uint64_t));
350
- }
351
- }
352
-
353
- template<typename A>
354
- auto compact_theta_sketch_experimental<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
355
- const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
356
- const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
357
- const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
358
- + sizeof(uint64_t) * entries_.size();
359
- vector_bytes bytes(size, 0, entries_.get_allocator());
360
- uint8_t* ptr = bytes.data() + header_size_bytes;
361
-
362
- ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
363
- const uint8_t serial_version = SERIAL_VERSION;
364
- ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
365
- const uint8_t type = SKETCH_TYPE;
366
- ptr += copy_to_mem(&type, ptr, sizeof(type));
367
- const uint16_t unused16 = 0;
368
- ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
369
- const uint8_t flags_byte(
370
- (1 << flags::IS_COMPACT) |
371
- (1 << flags::IS_READ_ONLY) |
372
- (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
373
- (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
374
- );
375
- ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
376
- const uint16_t seed_hash = get_seed_hash();
377
- ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
378
- if (!this->is_empty()) {
379
- if (!is_single_item) {
380
- const uint32_t num_entries = entries_.size();
381
- ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
382
- const uint32_t unused32 = 0;
383
- ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
384
- if (this->is_estimation_mode()) {
385
- ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
386
- }
387
- }
388
- ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
389
- }
390
- return bytes;
391
- }
392
-
393
- template<typename A>
394
- compact_theta_sketch_experimental<A> compact_theta_sketch_experimental<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
395
- uint8_t preamble_longs;
396
- is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
397
- uint8_t serial_version;
398
- is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
399
- uint8_t type;
400
- is.read(reinterpret_cast<char*>(&type), sizeof(type));
401
- uint16_t unused16;
402
- is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
403
- uint8_t flags_byte;
404
- is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
405
- uint16_t seed_hash;
406
- is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
407
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
408
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
409
- const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
410
- if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
411
-
412
- uint64_t theta = theta_constants::MAX_THETA;
413
- uint32_t num_entries = 0;
414
- if (!is_empty) {
415
- if (preamble_longs == 1) {
416
- num_entries = 1;
417
- } else {
418
- is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
419
- uint32_t unused32;
420
- is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
421
- if (preamble_longs > 2) {
422
- is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
423
- }
424
- }
425
- }
426
- std::vector<uint64_t, A> entries(num_entries, 0, allocator);
427
- if (!is_empty) is.read(reinterpret_cast<char*>(entries.data()), sizeof(uint64_t) * entries.size());
428
-
429
- const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
430
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
431
- return compact_theta_sketch_experimental(is_empty, is_ordered, seed_hash, theta, std::move(entries));
432
- }
433
-
434
- template<typename A>
435
- compact_theta_sketch_experimental<A> compact_theta_sketch_experimental<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
436
- ensure_minimum_memory(size, 8);
437
- const char* ptr = static_cast<const char*>(bytes);
438
- const char* base = ptr;
439
- uint8_t preamble_longs;
440
- ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
441
- uint8_t serial_version;
442
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
443
- uint8_t type;
444
- ptr += copy_from_mem(ptr, &type, sizeof(type));
445
- uint16_t unused16;
446
- ptr += copy_from_mem(ptr, &unused16, sizeof(unused16));
447
- uint8_t flags_byte;
448
- ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
449
- uint16_t seed_hash;
450
- ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
451
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
453
- const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
454
- if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
455
-
456
- uint64_t theta = theta_constants::MAX_THETA;
457
- uint32_t num_entries = 0;
458
- if (!is_empty) {
459
- if (preamble_longs == 1) {
460
- num_entries = 1;
461
- } else {
462
- ensure_minimum_memory(size, 8); // read the first prelong before this method
463
- ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
464
- uint32_t unused32;
465
- ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
466
- if (preamble_longs > 2) {
467
- ensure_minimum_memory(size, (preamble_longs - 1) << 3);
468
- ptr += copy_from_mem(ptr, &theta, sizeof(theta));
469
- }
470
- }
471
- }
472
- const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
473
- check_memory_size(ptr - base + entries_size_bytes, size);
474
- std::vector<uint64_t, A> entries(num_entries, 0, allocator);
475
- if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
476
-
477
- const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
478
- return compact_theta_sketch_experimental(is_empty, is_ordered, seed_hash, theta, std::move(entries));
479
- }
480
-
481
- } /* namespace datasketches */