datasketches 0.4.2 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/NOTICE +1 -1
  4. data/README.md +1 -1
  5. data/ext/datasketches/vo_wrapper.cpp +1 -1
  6. data/lib/datasketches/version.rb +1 -1
  7. data/vendor/datasketches-cpp/CMakeLists.txt +2 -0
  8. data/vendor/datasketches-cpp/LICENSE +35 -7
  9. data/vendor/datasketches-cpp/NOTICE +3 -3
  10. data/vendor/datasketches-cpp/README.md +2 -3
  11. data/vendor/datasketches-cpp/common/CMakeLists.txt +2 -3
  12. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +5 -6
  13. data/vendor/datasketches-cpp/common/include/common_defs.hpp +18 -0
  14. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +5 -7
  15. data/vendor/datasketches-cpp/common/include/xxhash64.h +202 -0
  16. data/vendor/datasketches-cpp/count/CMakeLists.txt +0 -1
  17. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +0 -1
  18. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -1
  19. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +10 -0
  20. data/vendor/datasketches-cpp/density/CMakeLists.txt +0 -1
  21. data/vendor/datasketches-cpp/fi/CMakeLists.txt +0 -1
  22. data/vendor/datasketches-cpp/filters/CMakeLists.txt +43 -0
  23. data/vendor/datasketches-cpp/filters/include/bit_array_ops.hpp +180 -0
  24. data/vendor/datasketches-cpp/filters/include/bloom_filter.hpp +753 -0
  25. data/vendor/datasketches-cpp/filters/include/bloom_filter_builder_impl.hpp +132 -0
  26. data/vendor/datasketches-cpp/filters/include/bloom_filter_impl.hpp +908 -0
  27. data/vendor/datasketches-cpp/filters/test/CMakeLists.txt +60 -0
  28. data/vendor/datasketches-cpp/filters/test/bit_array_ops_test.cpp +107 -0
  29. data/vendor/datasketches-cpp/filters/test/bloom_filter_allocation_test.cpp +75 -0
  30. data/vendor/datasketches-cpp/filters/test/bloom_filter_deserialize_from_java_test.cpp +51 -0
  31. data/vendor/datasketches-cpp/filters/test/bloom_filter_serialize_for_java.cpp +45 -0
  32. data/vendor/datasketches-cpp/filters/test/bloom_filter_test.cpp +406 -0
  33. data/vendor/datasketches-cpp/hll/CMakeLists.txt +0 -1
  34. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -1
  35. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +6 -5
  36. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +0 -1
  37. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -1
  38. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +0 -1
  39. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +4 -4
  40. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +13 -16
  41. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +3 -1
  42. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +10 -11
  43. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +7 -4
  44. data/vendor/datasketches-cpp/tdigest/CMakeLists.txt +41 -0
  45. data/vendor/datasketches-cpp/tdigest/include/tdigest.hpp +304 -0
  46. data/vendor/datasketches-cpp/tdigest/include/tdigest_impl.hpp +632 -0
  47. data/vendor/datasketches-cpp/tdigest/test/CMakeLists.txt +56 -0
  48. data/vendor/datasketches-cpp/tdigest/test/tdigest_custom_allocator_test.cpp +43 -0
  49. data/vendor/datasketches-cpp/tdigest/test/tdigest_deserialize_from_java_test.cpp +54 -0
  50. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_double.sk +0 -0
  51. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_float.sk +0 -0
  52. data/vendor/datasketches-cpp/tdigest/test/tdigest_serialize_for_java.cpp +67 -0
  53. data/vendor/datasketches-cpp/tdigest/test/tdigest_test.cpp +456 -0
  54. data/vendor/datasketches-cpp/theta/CMakeLists.txt +0 -1
  55. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +5 -5
  56. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  57. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +18 -1
  58. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +45 -21
  59. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +41 -38
  60. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +17 -0
  61. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +1 -1
  62. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +73 -2
  63. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +0 -1
  64. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -1
  65. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -0
  66. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +61 -0
  67. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  68. metadata +27 -9
@@ -0,0 +1,180 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _BIT_ARRAY_OPS_HPP_
21
+ #define _BIT_ARRAY_OPS_HPP_
22
+
23
+ #include <bitset>
24
+
25
+ namespace datasketches {
26
+
27
+ /**
28
+ * This class comprises methods that operate one or more arrays of bits (uint8_t*) to
29
+ * provide bit array operations. The class does not take ownership of memory and operates On
30
+ * arrays in-place. Sizes of the arrays, in bytes, are passed in as arguments.
31
+ *
32
+ * None of the methods in this class perform bounds checks. The caller is responsible for ensuring
33
+ * that indices are within the array bounds.
34
+ *
35
+ * Implementation assumes the actual arrays are multiples of 64 bits in length.
36
+ */
37
+ namespace bit_array_ops {
38
+
39
+ /**
40
+ * Get the value of a bit at the given index.
41
+ * @param array the array of bits
42
+ * @param index the index of the bit to get
43
+ * @return the value of the bit at the given index.
44
+ */
45
+ static inline bool get_bit(uint8_t* array, uint64_t index) {
46
+ return (array[index >> 3] & (1 << (index & 7))) != 0;
47
+ }
48
+
49
+ /**
50
+ * Set the bit at the given index to 1.
51
+ * @param array the array of bits
52
+ * @param index the index of the bit to set.
53
+ */
54
+ static inline void set_bit(uint8_t* array, uint64_t index) {
55
+ array[index >> 3] |= (1 << (index & 7));
56
+ }
57
+
58
+ /**
59
+ * Set the bit at the given index to 0.
60
+ * @param array the array of bits
61
+ * @param index the index of the bit to clear.
62
+ */
63
+ static inline void clear_bit(uint8_t* array, uint64_t index) {
64
+ array[index >> 3] &= ~(1 << (index & 7));
65
+ }
66
+
67
+ /**
68
+ * Assign the value of the bit at the given index.
69
+ * @param array the array of bits
70
+ * @param index the index of the bit to set.
71
+ */
72
+ static inline void assign_bit(uint8_t* array, uint64_t index, bool value) {
73
+ // read-only checks handled by set_bit() and clear_bit()
74
+ if (value) {
75
+ set_bit(array, index);
76
+ } else {
77
+ clear_bit(array, index);
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Gets the value of a bit at the specified index and sets it to true
83
+ * @param array the array of bits
84
+ * @param index the index of the bit to get and set
85
+ * @return the value of the bit at the specified index
86
+ */
87
+ static inline bool get_and_set_bit(uint8_t* array, uint64_t index) {
88
+ const uint64_t offset = index >> 3;
89
+ const uint8_t mask = 1 << (index & 7);
90
+ if ((array[offset] & mask) != 0) {
91
+ return true;
92
+ } else {
93
+ array[offset] |= mask;
94
+ return false;
95
+ }
96
+ }
97
+
98
+ /**
99
+ * @brief Gets the number of bits set in the bit array.
100
+ * @param array the array of bits
101
+ * @param length_bytes the length of the array, in bytes
102
+ * @return the number of bits set in the bit array.
103
+ */
104
+ static inline uint64_t count_num_bits_set(uint8_t* array, uint64_t length_bytes) {
105
+ uint64_t num_bits_set = 0;
106
+
107
+ // we rounded up to a multiple of 64 so we know we can use 64-bit operations
108
+ const uint64_t* array64 = reinterpret_cast<const uint64_t*>(array);
109
+ // Calculate the number of 64-bit chunks
110
+ uint64_t num_longs = length_bytes / 8; // 8 bytes per 64 bits
111
+ for (uint64_t i = 0; i < num_longs; ++i) {
112
+ // Wrap the 64-bit chunk with std::bitset for easy bit counting
113
+ std::bitset<64> bits(array64[i]);
114
+ num_bits_set += bits.count();
115
+ }
116
+ return num_bits_set;
117
+ }
118
+
119
+ /**
120
+ * Performs a union operation on one bit array with another bit array.
121
+ * This operation modifies the tgt bit array to be the union of its original bits and the bits of the src array.
122
+ * The union operation is equivalent to a bitwise OR operation between the two arrays.
123
+ *
124
+ * @param tgt the array of bits into which the results are written
125
+ * @param src the array of bits to union into tgt
126
+ * @param length_bytes the length of the two arrays, in bytes
127
+ * @return the number of bits set in the resulting array
128
+ */
129
+ static inline uint64_t union_with(uint8_t* tgt, const uint8_t* src, uint64_t length_bytes) {
130
+ uint64_t num_bits_set = 0;
131
+ for (uint64_t i = 0; i < length_bytes; ++i) {
132
+ tgt[i] |= src[i];
133
+ std::bitset<8> bits(tgt[i]);
134
+ num_bits_set += bits.count();
135
+ }
136
+ return num_bits_set;
137
+ }
138
+
139
+ /**
140
+ * Performs an intersection operation on one bit array with another bit array.
141
+ * This operation modifies the tgt bit array to contain only the bits that are set in both that array and the src array.
142
+ * The intersection operation is equivalent to a bitwise AND operation between the two arrays.
143
+ *
144
+ * @param tgt the array of bits into which the results are written
145
+ * @param src the array of bits to intersect with tgt
146
+ * @param length_bytes the length of the two arrays, in bytes
147
+ * @return the number of bits set in the resulting array
148
+ */
149
+ static inline uint64_t intersect(uint8_t* tgt, const uint8_t* src, uint64_t length_bytes) {
150
+ uint64_t num_bits_set = 0;
151
+ for (uint64_t i = 0; i < length_bytes; ++i) {
152
+ tgt[i] &= src[i];
153
+ std::bitset<8> bits(tgt[i]);
154
+ num_bits_set += bits.count();
155
+ }
156
+ return num_bits_set;
157
+ }
158
+
159
+ /**
160
+ * Inverts the bits of this bit array.
161
+ * This operation modifies the bit array by flipping all its bits; 0s become 1s and 1s become 0s.
162
+ * @param array the array of bits
163
+ * @param length_bytes the length of the array, in bytes
164
+ * @return the number of bits set in the resulting array
165
+ */
166
+ static inline uint64_t invert(uint8_t* array, uint64_t length_bytes) {
167
+ uint64_t num_bits_set = 0;
168
+ for (uint64_t i = 0; i < length_bytes; ++i) {
169
+ array[i] = ~array[i];
170
+ std::bitset<8> bits(array[i]);
171
+ num_bits_set += bits.count();
172
+ }
173
+ return num_bits_set;
174
+ }
175
+
176
+ } // namespace bit_array_ops
177
+
178
+ } // namespace datasketches
179
+
180
+ #endif // _BIT_ARRAY_OPS_HPP_