datasketches 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +2 -0
  7. data/vendor/datasketches-cpp/LICENSE +40 -3
  8. data/vendor/datasketches-cpp/NOTICE +1 -1
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +2 -0
  10. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  11. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  12. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  13. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +1 -1
  14. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +7 -2
  15. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1 -1
  16. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +65 -1
  17. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  18. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  19. data/vendor/datasketches-cpp/python/README.md +50 -50
  20. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  21. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  22. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  23. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  24. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  25. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  26. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  27. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  28. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  29. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1 -1
  30. data/vendor/datasketches-cpp/setup.py +10 -7
  31. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  32. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  33. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -1
  34. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c578044053c564421893cc4433f7fe557f23ba9d8a1995fc2a2c5f07742721a
4
- data.tar.gz: f4122bd75e19fede015b01a5e5ad8e6130f75babe9c9160cc56f378480a16cee
3
+ metadata.gz: ea00e444de6dc1bebc2b8cf878a250f08717d55eaa55f63f6bec28f4be2af00d
4
+ data.tar.gz: 161b9089e3b8d0dbd99cfb6cc0af463934c42ba85f4788a08306369966f28571
5
5
  SHA512:
6
- metadata.gz: 2d7c4d7306f28356557a816a78033b909561ccd8f843281a2b756e88cbdcb9936da7995ff80871a19e229675ead812aca00d6c639d63a6532998c3c1b35aa953
7
- data.tar.gz: fdf0fe1d14e04bfddef9df1ae7958f6571a7f689865aa02e81713d4b250afeeeb8c90a168ce855728d9c831aff6d3ea71df91c71b7269a760c19488c42c92658
6
+ metadata.gz: '09eede1e6e4c0fe57c0116c4e8873670192fea845783687ca34890bd9358af9dd19a535774ab7dd667055cf6acd0d3913f044dcf2274e0ec092b33307250a74a'
7
+ data.tar.gz: b8bcaeb7af0d27e836f21941663229a2750922914c4f31f4ffbd6e3c3876320f9ce92916eb9730e02227b87e8f244bc08e5bc38541bf4ef4e3485203fff01942
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.3 (2021-09-29)
2
+
3
+ - Updated DataSketches to 3.2.0
4
+
1
5
  ## 0.2.2 (2021-07-17)
2
6
 
3
7
  - Updated DataSketches to 3.1.0
data/LICENSE CHANGED
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
284
284
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
285
285
  DEALINGS IN THE SOFTWARE.
286
286
  -------------------------------------------------------------
287
- Code Locations
287
+ Code Locations:
288
288
  * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
289
289
  that is adapted from the above.
290
290
 
291
291
 
292
+ =============================================================
293
+ BSD License
294
+ =============================================================
295
+ Original source code:
296
+ https://github.com/pybind/pybind11/blob/master/LICENSE
297
+
298
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
299
+
300
+ Redistribution and use in source and binary forms, with or without
301
+ modification, are permitted provided that the following conditions are met:
302
+
303
+ 1. Redistributions of source code must retain the above copyright notice, this
304
+ list of conditions and the following disclaimer.
305
+
306
+ 2. Redistributions in binary form must reproduce the above copyright notice,
307
+ this list of conditions and the following disclaimer in the documentation
308
+ and/or other materials provided with the distribution.
309
+
310
+ 3. Neither the name of the copyright holder nor the names of its contributors
311
+ may be used to endorse or promote products derived from this software
312
+ without specific prior written permission.
313
+
314
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
315
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
316
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
317
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
318
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
319
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
320
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
321
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
322
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
323
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324
+ -------------------------------------------------------------
325
+ Code Locations:
326
+ Found only in the convenience binaries distributed from PyPI, which rely
327
+ on pybind11 code during compilation.
328
+
292
329
 
293
330
  =============================================================
294
331
  Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
297
334
  https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
298
335
  Placed in the Public Domain by Austin Appleby
299
336
 
300
- Code Locations
337
+ Code Locations:
301
338
  common/include/MurmurHash3.h
302
339
  that is adapted from the above.
303
340
  -------------------------------------------------------------
@@ -305,6 +342,6 @@ APPENDIX B: Additional licenses relevant to this product.
305
342
  * https://graphics.stanford.edu/~seander/bithacks.html
306
343
  * Placed in the Public Domain by Sean Eron Anderson
307
344
 
308
- Code Locations
345
+ Code Locations:
309
346
  * common/include/ceiling_power_of_2.hpp
310
347
  that is adapted from the above.
data/NOTICE CHANGED
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches-cpp
2
- Copyright 2020 The Apache Software Foundation
2
+ Copyright 2020-2021 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo
5
5
  Copyright 2019 Verizon Media
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.3"
3
3
  end
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
35
35
  #set(CMAKE_VERBOSE_MAKEFILE ON)
36
36
  set(CMAKE_MACOSX_RPATH ON)
37
37
 
38
+ set(CMAKE_CXX_STANDARD 11)
39
+
38
40
  # enable compiler warnings globally
39
41
  # derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
40
42
  # and https://arne-mertz.de/2018/07/cmake-properties-options/
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
284
284
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
285
285
  DEALINGS IN THE SOFTWARE.
286
286
  -------------------------------------------------------------
287
- Code Locations
287
+ Code Locations:
288
288
  * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
289
289
  that is adapted from the above.
290
290
 
291
291
 
292
+ =============================================================
293
+ BSD License
294
+ =============================================================
295
+ Original source code:
296
+ https://github.com/pybind/pybind11/blob/master/LICENSE
297
+
298
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
299
+
300
+ Redistribution and use in source and binary forms, with or without
301
+ modification, are permitted provided that the following conditions are met:
302
+
303
+ 1. Redistributions of source code must retain the above copyright notice, this
304
+ list of conditions and the following disclaimer.
305
+
306
+ 2. Redistributions in binary form must reproduce the above copyright notice,
307
+ this list of conditions and the following disclaimer in the documentation
308
+ and/or other materials provided with the distribution.
309
+
310
+ 3. Neither the name of the copyright holder nor the names of its contributors
311
+ may be used to endorse or promote products derived from this software
312
+ without specific prior written permission.
313
+
314
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
315
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
316
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
317
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
318
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
319
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
320
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
321
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
322
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
323
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324
+ -------------------------------------------------------------
325
+ Code Locations:
326
+ Found only in the convenience binaries distributed from PyPI, which rely
327
+ on pybind11 code during compilation.
328
+
292
329
 
293
330
  =============================================================
294
331
  Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
297
334
  https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
298
335
  Placed in the Public Domain by Austin Appleby
299
336
 
300
- Code Locations
337
+ Code Locations:
301
338
  common/include/MurmurHash3.h
302
339
  that is adapted from the above.
303
340
  -------------------------------------------------------------
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
305
342
  * https://graphics.stanford.edu/~seander/bithacks.html
306
343
  * Placed in the Public Domain by Sean Eron Anderson
307
344
 
308
- Code Locations
345
+ Code Locations:
309
346
  * common/include/ceiling_power_of_2.hpp
310
347
  that is adapted from the above.
311
348
 
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches-cpp
2
- Copyright 2020 The Apache Software Foundation
2
+ Copyright 2020-2021 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo
5
5
  Copyright 2019 Verizon Media
@@ -29,6 +29,8 @@ namespace datasketches {
29
29
 
30
30
  static const uint64_t DEFAULT_SEED = 9001;
31
31
 
32
+ enum resize_factor { X1 = 0, X2, X4, X8 };
33
+
32
34
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
33
35
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
34
36
 
@@ -26,9 +26,16 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static const uint8_t CPC_MIN_LG_K = 4;
30
- static const uint8_t CPC_MAX_LG_K = 26;
31
- static const uint8_t CPC_DEFAULT_LG_K = 11;
29
+ namespace cpc_constants {
30
+ const uint8_t MIN_LG_K = 4;
31
+ const uint8_t MAX_LG_K = 26;
32
+ const uint8_t DEFAULT_LG_K = 11;
33
+ }
34
+
35
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
36
+ static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
37
+ static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
38
+ static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
32
39
 
33
40
  template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
34
41
  template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
@@ -67,7 +67,7 @@ public:
67
67
  * @param lg_k base 2 logarithm of the number of bins in the sketch
68
68
  * @param seed for hash function
69
69
  */
70
- explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
70
+ explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
71
71
 
72
72
  using allocator_type = A;
73
73
  A get_allocator() const;
@@ -45,7 +45,7 @@ public:
45
45
  * @param lg_k base 2 logarithm of the number of bins in the sketch
46
46
  * @param seed for hash function
47
47
  */
48
- explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
48
+ explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
49
49
 
50
50
  cpc_union_alloc(const cpc_union_alloc<A>& other);
51
51
  cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
@@ -350,7 +350,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
350
350
  check_serial_version(serial_version);
351
351
  check_family_id(family_id);
352
352
  check_size(lg_cur_size, lg_max_size);
353
- ensure_minimum_memory(size, 1ULL << preamble_longs);
353
+ ensure_minimum_memory(size, preamble_longs * sizeof(uint64_t));
354
354
 
355
355
  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
356
356
  if (!is_empty) {
@@ -153,6 +153,10 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
153
153
  template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
154
154
  template<typename A> using vector_d = std::vector<double, AllocD<A>>;
155
155
 
156
+ namespace kll_constants {
157
+ const uint16_t DEFAULT_K = 200;
158
+ }
159
+
156
160
  template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
157
161
  class kll_sketch {
158
162
  public:
@@ -160,11 +164,12 @@ class kll_sketch {
160
164
  using comparator = C;
161
165
 
162
166
  static const uint8_t DEFAULT_M = 8;
163
- static const uint16_t DEFAULT_K = 200;
167
+ // TODO: Redundant and deprecated. Will be remove din next major version.
168
+ static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
164
169
  static const uint16_t MIN_K = DEFAULT_M;
165
170
  static const uint16_t MAX_K = (1 << 16) - 1;
166
171
 
167
- explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
172
+ explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
168
173
  kll_sketch(const kll_sketch& other);
169
174
  kll_sketch(kll_sketch&& other) noexcept;
170
175
  ~kll_sketch();
@@ -575,7 +575,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
575
575
  check_preamble_ints(preamble_ints, flags_byte);
576
576
  check_serial_version(serial_version);
577
577
  check_family_id(family_id);
578
- ensure_minimum_memory(size, 1ULL << preamble_ints);
578
+ ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
579
579
 
580
580
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
581
581
  if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
@@ -279,6 +279,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
279
279
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
280
280
  auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
281
281
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
282
+ REQUIRE(s.tellg() == s.tellp());
282
283
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
283
284
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
284
285
  REQUIRE(sketch2.get_n() == sketch.get_n());
@@ -304,7 +305,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
304
305
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
305
306
  }
306
307
 
307
- SECTION("serialize deserialize one item") {
308
+ SECTION("stream serialize deserialize one item") {
308
309
  kll_float_sketch sketch(200, 0);
309
310
  sketch.update(1.0f);
310
311
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -324,6 +325,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
324
325
  REQUIRE(sketch2.get_rank(2) == 1.0);
325
326
  }
326
327
 
328
+ SECTION("bytes serialize deserialize one item") {
329
+ kll_float_sketch sketch(200, 0);
330
+ sketch.update(1.0f);
331
+ auto bytes = sketch.serialize();
332
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
333
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
334
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
335
+ REQUIRE_FALSE(sketch2.is_empty());
336
+ REQUIRE_FALSE(sketch2.is_estimation_mode());
337
+ REQUIRE(sketch2.get_n() == 1);
338
+ REQUIRE(sketch2.get_num_retained() == 1);
339
+ REQUIRE(sketch2.get_min_value() == 1.0);
340
+ REQUIRE(sketch2.get_max_value() == 1.0);
341
+ REQUIRE(sketch2.get_quantile(0.5) == 1.0);
342
+ REQUIRE(sketch2.get_rank(1) == 0.0);
343
+ REQUIRE(sketch2.get_rank(2) == 1.0);
344
+ }
345
+
327
346
  SECTION("deserialize one item v1") {
328
347
  std::ifstream is;
329
348
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -337,6 +356,42 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
337
356
  REQUIRE(sketch.get_max_value() == 1.0);
338
357
  }
339
358
 
359
+ SECTION("stream serialize deserialize three items") {
360
+ kll_float_sketch sketch(200, 0);
361
+ sketch.update(1.0f);
362
+ sketch.update(2.0f);
363
+ sketch.update(3.0f);
364
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
365
+ sketch.serialize(s);
366
+ REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
367
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
368
+ REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
369
+ REQUIRE(s.tellg() == s.tellp());
370
+ REQUIRE_FALSE(sketch2.is_empty());
371
+ REQUIRE_FALSE(sketch2.is_estimation_mode());
372
+ REQUIRE(sketch2.get_n() == 3);
373
+ REQUIRE(sketch2.get_num_retained() == 3);
374
+ REQUIRE(sketch2.get_min_value() == 1.0);
375
+ REQUIRE(sketch2.get_max_value() == 3.0);
376
+ }
377
+
378
+ SECTION("bytes serialize deserialize three items") {
379
+ kll_float_sketch sketch(200, 0);
380
+ sketch.update(1.0f);
381
+ sketch.update(2.0f);
382
+ sketch.update(3.0f);
383
+ auto bytes = sketch.serialize();
384
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
385
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
386
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
387
+ REQUIRE_FALSE(sketch2.is_empty());
388
+ REQUIRE_FALSE(sketch2.is_estimation_mode());
389
+ REQUIRE(sketch2.get_n() == 3);
390
+ REQUIRE(sketch2.get_num_retained() == 3);
391
+ REQUIRE(sketch2.get_min_value() == 1.0);
392
+ REQUIRE(sketch2.get_max_value() == 3.0);
393
+ }
394
+
340
395
  SECTION("stream serialize deserialize many floats") {
341
396
  kll_float_sketch sketch(200, 0);
342
397
  const int n = 1000;
@@ -702,6 +757,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
702
757
  REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
703
758
  }
704
759
 
760
+ SECTION("issue #236") {
761
+ kll_sketch<int8_t> kll;
762
+ kll.update(1);
763
+ kll.update(2);
764
+ kll.update(3);
765
+ auto blob = kll.serialize();
766
+ auto kll2 = kll_sketch<int8_t>::deserialize(blob.data(), blob.size());
767
+ }
768
+
705
769
  // cleanup
706
770
  if (test_allocator_total_bytes != 0) {
707
771
  REQUIRE(test_allocator_total_bytes == 0);
@@ -1,8 +1,10 @@
1
1
  [build-system]
2
2
  requires = ["wheel",
3
3
  "setuptools >= 30.3.0",
4
- "setuptools_scm",
5
- "cmake >= 3.12"]
4
+ "cmake >= 3.12",
5
+ "pip >= 10.0",
6
+ "pybind11[global] >= 2.6.0"]
7
+ build-backend = "setuptools.build_meta"
6
8
 
7
9
  [tool.tox]
8
10
  legacy_tox_ini = """
@@ -15,16 +15,20 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- # TODO: Can we force python version >= 3.0?
18
+ find_package(Python3 COMPONENTS Interpreter Development)
19
+
20
+ # only Windows+MSVC seems to have trouble locating pybind11
19
21
  if (MSVC)
20
- set(PYBIND11_CPP_STANDARD /std:c++11)
21
- else()
22
- set(PYBIND11_CPP_STANDARD -std=c++11)
22
+ execute_process(COMMAND cmd.exe /c ${CMAKE_CURRENT_SOURCE_DIR}/pybind11Path.cmd "${Python3_EXECUTABLE}"
23
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
24
+ OUTPUT_STRIP_TRAILING_WHITESPACE
25
+ OUTPUT_VARIABLE EXTRA_PACKAGE_PATH)
26
+ set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${EXTRA_PACKAGE_PATH})
23
27
  endif()
24
28
 
25
- add_subdirectory(pybind11)
29
+ find_package(pybind11 CONFIG REQUIRED)
26
30
 
27
- pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
31
+ pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
28
32
 
29
33
  target_link_libraries(python
30
34
  PRIVATE
@@ -1,76 +1,57 @@
1
- # Python Wrapper for Apache DataSketches
1
+ <img src="https://raw.githubusercontent.com/apache/datasketches-website/master/logos/svg/datasketches-HorizontalColor-TM.svg" width="75%" alt="Apache DataSketchs Logo">
2
2
 
3
- ## Installation
3
+ # The Apache DataSketches Library for Python
4
4
 
5
- The release files do not include the needed python binding library ([pybind11](https://github.com/pybind/pybind11)). If building
6
- from a relase package, you must ensure that the pybind11 directory points to a local copy of pybind11.
5
+ This is the official version of the [Apache DataSketches](https://datasketches.apache.org) Python library.
7
6
 
8
- An official pypi build is eventually planned but not yet available.
7
+ In the analysis of big data there are often problem queries that don’t scale because they require huge compute resources and time to generate exact results. Examples include count distinct, quantiles, most-frequent items, joins, matrix computations, and graph analysis.
9
8
 
10
- If you instead want to take a (possibly ill-advised) gamble on the current state of the master branch being useable, you can run:
11
- ```pip install git+https://github.com/apache/datasketches-cpp.git```
9
+ If approximate results are acceptable, there is a class of specialized algorithms, called streaming algorithms, or sketches that can produce results orders-of magnitude faster and with mathematically proven error bounds. For interactive queries there may not be other viable alternatives, and in the case of real-time analysis, sketches are the only known solution.
12
10
 
13
- ## Developer Instructions
14
-
15
- ### Building
16
-
17
- When cloning the source repository, you should include the pybind11 submodule with the `--recursive` option to the clone command:
18
- ```
19
- git clone --recursive https://github.com/apache/datasketches-cpp.git
20
- cd datasketches-cpp
21
- python -m pip install --upgrade pip setuptools wheel numpy
22
- python setup.py build
23
- ```
11
+ This package provides a variety of sketches as described below. Wherever a specific type of sketch exists in Apache DataSketches packages for other languages, the sketches will be portable between languages (for platforms with the same endianness).
24
12
 
25
- If you cloned without `--recursive`, you can add the submodule post-checkout using `git submodule update --init --recursive`.
13
+ ## Building and Installation
26
14
 
27
- ### Installing
15
+ Once cloned, the library can be installed by running `python -m pip install .` in the project root directory, which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
28
16
 
29
- Assuming you have already checked out the library and any dependent submodules, install by simply replacing the lsat
30
- line of the build command with `python setup.py install`.
17
+ If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
31
18
 
32
- ### Unit tests
33
-
34
- The python tests are run with `tox`. To ensure you have all the needed packages, from the package base directory run:
35
- ```
36
- python -m pip install --upgrade pip setuptools wheel numpy tox
37
- tox
38
- ```
19
+ The library is also available from PyPI via `python -m pip install datasketches`.
39
20
 
40
21
  ## Usage
41
22
 
42
- Having installed the library, loading the Apache Datasketches library in Python is simple: `import datasketches`.
23
+ Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
43
24
 
44
25
  ## Available Sketch Classes
45
26
 
46
27
  - KLL (Absolute Error Quantiles)
47
- - `kll_ints_sketch`
48
- - `kll_floats_sketch`
28
+ - `kll_ints_sketch`
29
+ - `kll_floats_sketch`
49
30
  - REQ (Relative Error Quantiles)
50
- - `req_ints_sketch`
51
- - `req_floats_sketch`
31
+ - `req_ints_sketch`
32
+ - `req_floats_sketch`
52
33
  - Frequent Items
53
- - `frequent_strings_sketch`
54
- - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
34
+ - `frequent_strings_sketch`
35
+ - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
55
36
  - Theta
56
- - `update_theta_sketch`
57
- - `compact_theta_sketch` (cannot be instantiated directly)
58
- - `theta_union`
59
- - `theta_intersection`
60
- - `theta_a_not_b`
37
+ - `update_theta_sketch`
38
+ - `compact_theta_sketch` (cannot be instantiated directly)
39
+ - `theta_union`
40
+ - `theta_intersection`
41
+ - `theta_a_not_b`
61
42
  - HLL
62
- - `hll_sketch`
63
- - `hll_union`
64
- - Target HLL types are `tgt_hll_type.{HLL_4 | HLL_6 | HLL_8}`
43
+ - `hll_sketch`
44
+ - `hll_union`
45
+ - Target HLL types are `tgt_hll_type.{HLL_4 | HLL_6 | HLL_8}`
65
46
  - CPC
66
- - `cpc_sketch`
67
- - `cpc_union`
47
+ - `cpc_sketch`
48
+ - `cpc_union`
68
49
  - VarOpt Sampling
69
- - `var_opt_sketch`
70
- - `var_opt_union`
50
+ - `var_opt_sketch`
51
+ - `var_opt_union`
71
52
  - Vector of KLL
72
- - `vector_of_kll_ints_sketches`
73
- - `vector_of_kll_floats_sketches`
53
+ - `vector_of_kll_ints_sketches`
54
+ - `vector_of_kll_floats_sketches`
74
55
 
75
56
  ## Known Differences from C++
76
57
 
@@ -79,3 +60,22 @@ The Python API largely mirrors the C++ API, with a few minor exceptions: The pri
79
60
  The Vector of KLL object is currently exclusive to python, and holds an array of independent KLL sketches. This is useful for creating a set of KLL sketches over a vector and has been designed to allow input as either a vector or a matrix of multiple vectors.
80
61
 
81
62
  We have also removed reliance on a builder class for theta sketches as Python allows named arguments to the constructor, not strictly positional arguments.
63
+
64
+ ## Developer Instructions
65
+
66
+ The only developer-specific instructions relate to running unit tests.
67
+
68
+ ### Unit tests
69
+
70
+ The Python unit tests are run with `tox`. To ensure you have all the needed package, from the package base directory run:
71
+
72
+ ```bash
73
+ python -m pip install --upgrade tox
74
+ tox
75
+ ```
76
+
77
+ ## License
78
+
79
+ The Apache DataSketches Library is distrubted under an Apache 2.0 License.
80
+
81
+ There may be precompiled binaries provided as a convenience and distributed through PyPI via [https://pypi.org/project/datasketches/] contain compiled code from [pybind11](https://github.com/pybind/pybind11), which is distributed under a BSD license.
@@ -0,0 +1,3 @@
1
+ @echo off
2
+ :: Takes path to the Python interpreter and returns the path to pybind11
3
+ %1 -m pip show pybind11 | %1 -c "import sys,re;[sys.stdout.write(re.sub('^Location:\\s+','',line)) for line in sys.stdin if re.search('^Location:\\s+',line)]"
@@ -53,7 +53,7 @@ void init_cpc(py::module &m) {
53
53
  using namespace datasketches;
54
54
 
55
55
  py::class_<cpc_sketch>(m, "cpc_sketch")
56
- .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=CPC_DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
56
+ .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=cpc_constants::DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
57
57
  .def(py::init<const cpc_sketch&>())
58
58
  .def("__str__", &cpc_sketch::to_string,
59
59
  "Produces a string summary of the sketch")
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
116
116
  using namespace datasketches;
117
117
 
118
118
  py::class_<kll_sketch<T>>(m, name)
119
- .def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
119
+ .def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
120
120
  .def(py::init<const kll_sketch<T>&>())
121
121
  .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
122
122
  "Updates the sketch with the given value")
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {
103
103
 
104
104
  py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
105
105
  .def(py::init(&dspy::update_theta_sketch_factory),
106
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
106
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
107
107
  .def(py::init<const update_theta_sketch&>())
108
108
  .def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
109
109
  "Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {
127
127
 
128
128
  py::class_<theta_union>(m, "theta_union")
129
129
  .def(py::init(&dspy::theta_union_factory),
130
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
130
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
131
131
  .def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
132
132
  "Updates the union with the given sketch")
133
133
  .def("get_result", &theta_union::get_result, py::arg("ordered")=true,
@@ -29,14 +29,20 @@ namespace py = pybind11;
29
29
 
30
30
  namespace datasketches {
31
31
 
32
+ namespace vector_of_kll_constants {
33
+ static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
34
+ static const uint32_t DEFAULT_D = 1;
35
+ }
36
+
32
37
  // Wrapper class for Numpy compatibility
33
38
  template <typename T, typename C = std::less<T>, typename S = serde<T>>
34
39
  class vector_of_kll_sketches {
35
40
  public:
36
- static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
37
- static const uint32_t DEFAULT_D = 1;
41
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
42
+ static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
43
+ static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
38
44
 
39
- explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
45
+ explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
40
46
  vector_of_kll_sketches(const vector_of_kll_sketches& other);
41
47
  vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
42
48
  vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
432
438
  using namespace datasketches;
433
439
 
434
440
  py::class_<vector_of_kll_sketches<T>>(m, name)
435
- .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K,
436
- py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
441
+ .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
442
+ py::arg("d")=vector_of_kll_constants::DEFAULT_D)
437
443
  .def(py::init<const vector_of_kll_sketches<T>&>())
438
444
  // allow user to retrieve k or d, in case it's instantiated w/ defaults
439
445
  .def("get_k", &vector_of_kll_sketches<T>::get_k,
@@ -30,10 +30,10 @@ class KllTest(unittest.TestCase):
30
30
  kll.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.025)
33
+ self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.035)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.025)
36
+ self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.035)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -30,10 +30,10 @@ class reqTest(unittest.TestCase):
30
30
  req.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.03)
33
+ self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.045)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.03)
36
+ self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.045)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -39,9 +39,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
39
39
  kll.update(dat)
40
40
 
41
41
  # 0 should be near the median
42
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
42
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
43
43
  # the median should be near 0
44
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
44
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
45
45
  # we also track the min/max independently from the rest of the data
46
46
  # which lets us know the full observed data range
47
47
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -118,9 +118,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
118
118
  kll.update(dat)
119
119
 
120
120
  # 0 should be near the median
121
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
121
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
122
122
  # the median should be near 0
123
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
123
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
124
124
  # we also track the min/max independently from the rest of the data
125
125
  # which lets us know the full observed data range
126
126
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -51,18 +51,23 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- enum resize_factor { X1 = 0, X2, X4, X8 };
55
-
56
54
  template <typename T, typename S, typename A> class var_opt_union; // forward declaration
57
55
 
56
+ namespace var_opt_constants {
57
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
58
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
59
+ }
60
+
58
61
  template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
59
62
  class var_opt_sketch {
60
63
 
61
64
  public:
62
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
65
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
66
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
64
67
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
68
+ explicit var_opt_sketch(uint32_t k,
69
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
70
+ const A& allocator = A());
66
71
  var_opt_sketch(const var_opt_sketch& other);
67
72
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
73
 
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
128
128
  r_(r_count),
129
129
  n_(n),
130
130
  total_wt_r_(total_wt_r),
131
- rf_(DEFAULT_RESIZE_FACTOR),
131
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
132
132
  curr_items_alloc_(len),
133
133
  filled_data_(n > k),
134
134
  allocator_(allocator),
@@ -49,8 +49,9 @@ class CMakeBuild(build_ext):
49
49
  os.path.dirname(self.get_ext_fullpath(ext.name)))
50
50
  cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
51
51
  cmake_args += ['-DWITH_PYTHON=True']
52
+ cmake_args += ['-DCMAKE_CXX_STANDARD=11']
52
53
  # ensure we use a consistent python version
53
- cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
54
+ cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
54
55
  cfg = 'Debug' if self.debug else 'Release'
55
56
  build_args = ['--config', cfg]
56
57
 
@@ -59,7 +60,8 @@ class CMakeBuild(build_ext):
59
60
  cfg.upper(),
60
61
  extdir)]
61
62
  if sys.maxsize > 2**32:
62
- cmake_args += ['-A', 'x64']
63
+ cmake_args += ['-T', 'host=x64']
64
+ cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
63
65
  build_args += ['--', '/m']
64
66
  else:
65
67
  cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
@@ -74,23 +76,24 @@ class CMakeBuild(build_ext):
74
76
  subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
75
77
  cwd=self.build_temp, env=env)
76
78
  subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
77
- cwd=self.build_temp)
79
+ cwd=self.build_temp, env=env)
78
80
  print() # add an empty line to pretty print
79
81
 
80
82
  setup(
81
83
  name='datasketches',
82
- version='3.1.0',
83
- author='Apache DataSketches Developers',
84
+ version='3.2.0.1',
85
+ author='Apache Software Foundation',
84
86
  author_email='dev@datasketches.apache.org',
85
- description='A wrapper for the C++ Apache DataSketches library',
87
+ description='The Apache DataSketches Library for Python',
86
88
  license='Apache License 2.0',
87
89
  url='http://datasketches.apache.org',
88
90
  long_description=open('python/README.md').read(),
91
+ long_description_content_type='text/markdown',
89
92
  packages=find_packages('python'), # python pacakges only in this dir
90
93
  package_dir={'':'python'},
91
94
  # may need to add all source paths for sdist packages w/o MANIFEST.in
92
95
  ext_modules=[CMakeExtension('datasketches')],
93
96
  cmdclass={'build_ext': CMakeBuild},
94
- setup_requires=['setuptools_scm','tox-setuptools'],
97
+ install_requires=['numpy'],
95
98
  zip_safe=False
96
99
  )
@@ -21,14 +21,19 @@
21
21
  #define THETA_CONSTANTS_HPP_
22
22
 
23
23
  #include <climits>
24
+ #include "common_defs.hpp"
24
25
 
25
26
  namespace datasketches {
26
27
 
27
28
  namespace theta_constants {
28
- enum resize_factor { X1, X2, X4, X8 };
29
- static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
30
- static const uint8_t MIN_LG_K = 5;
31
- static const uint8_t MAX_LG_K = 26;
29
+ using resize_factor = datasketches::resize_factor;
30
+ //enum resize_factor { X1, X2, X4, X8 };
31
+ const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
32
+ const uint8_t MIN_LG_K = 5;
33
+ const uint8_t MAX_LG_K = 26;
34
+
35
+ const uint8_t DEFAULT_LG_K = 12;
36
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
32
37
  }
33
38
 
34
39
  } /* namespace datasketches */
@@ -94,11 +94,14 @@ struct theta_update_sketch_base {
94
94
  template<typename Derived, typename Allocator>
95
95
  class theta_base_builder {
96
96
  public:
97
+ // TODO: Redundant and deprecated. Will be removed in next major verison release.
97
98
  using resize_factor = theta_constants::resize_factor;
98
99
  static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
99
100
  static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
100
- static const uint8_t DEFAULT_LG_K = 12;
101
- static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
101
+ // TODO: The following defaults are redundant and deprecated. Will be removed in the
102
+ // next major version release
103
+ static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
104
+ static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
102
105
 
103
106
  /**
104
107
  * Creates and instance of the builder with default parameters.
@@ -271,7 +271,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
271
271
 
272
272
  template<typename Derived, typename Allocator>
273
273
  theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
274
- allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
274
+ allocator_(allocator),
275
+ lg_k_(theta_constants::DEFAULT_LG_K),
276
+ rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
277
+ p_(1),
278
+ seed_(DEFAULT_SEED) {}
275
279
 
276
280
  template<typename Derived, typename Allocator>
277
281
  Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-17 00:00:00.000000000 Z
11
+ date: 2021-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -177,6 +177,7 @@ files:
177
177
  - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
178
178
  - vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb
179
179
  - vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
180
+ - vendor/datasketches-cpp/python/pybind11Path.cmd
180
181
  - vendor/datasketches-cpp/python/src/__init__.py
181
182
  - vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
182
183
  - vendor/datasketches-cpp/python/src/datasketches.cpp