datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-05 00:00:00.000000000 Z
11
+ date: 2022-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -65,12 +65,14 @@ files:
65
65
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
66
66
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
67
67
  - vendor/datasketches-cpp/common/include/memory_operations.hpp
68
- - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp
69
- - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp
68
+ - vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
69
+ - vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
70
70
  - vendor/datasketches-cpp/common/include/serde.hpp
71
+ - vendor/datasketches-cpp/common/include/version.hpp.in
71
72
  - vendor/datasketches-cpp/common/test/CMakeLists.txt
72
73
  - vendor/datasketches-cpp/common/test/catch_runner.cpp
73
74
  - vendor/datasketches-cpp/common/test/integration_test.cpp
75
+ - vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
74
76
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
75
77
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
76
78
  - vendor/datasketches-cpp/common/test/test_type.hpp
@@ -171,6 +173,9 @@ files:
171
173
  - vendor/datasketches-cpp/pyproject.toml
172
174
  - vendor/datasketches-cpp/python/CMakeLists.txt
173
175
  - vendor/datasketches-cpp/python/README.md
176
+ - vendor/datasketches-cpp/python/datasketches/PySerDe.py
177
+ - vendor/datasketches-cpp/python/datasketches/__init__.py
178
+ - vendor/datasketches-cpp/python/include/py_serde.hpp
174
179
  - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
175
180
  - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
176
181
  - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
@@ -184,6 +189,7 @@ files:
184
189
  - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
185
190
  - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
186
191
  - vendor/datasketches-cpp/python/src/ks_wrapper.cpp
192
+ - vendor/datasketches-cpp/python/src/py_serde.cpp
187
193
  - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
188
194
  - vendor/datasketches-cpp/python/src/req_wrapper.cpp
189
195
  - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
@@ -283,6 +289,7 @@ files:
283
289
  - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
284
290
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
285
291
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
292
+ - vendor/datasketches-cpp/tox.ini
286
293
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
287
294
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
288
295
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
@@ -308,12 +315,14 @@ files:
308
315
  - vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
309
316
  - vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
310
317
  - vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
318
+ - vendor/datasketches-cpp/tuple/test/engagement_test.cpp
311
319
  - vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
312
320
  - vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
313
321
  - vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
314
322
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
315
323
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
316
324
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
325
+ - vendor/datasketches-cpp/version.cfg.in
317
326
  homepage: https://github.com/ankane/datasketches-ruby
318
327
  licenses:
319
328
  - Apache-2.0
@@ -326,7 +335,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
326
335
  requirements:
327
336
  - - ">="
328
337
  - !ruby/object:Gem::Version
329
- version: '2.6'
338
+ version: '2.7'
330
339
  required_rubygems_version: !ruby/object:Gem::Requirement
331
340
  requirements:
332
341
  - - ">="
@@ -1,91 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
21
- #define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
22
-
23
- #include <algorithm>
24
- #include <stdexcept>
25
-
26
- namespace datasketches {
27
-
28
- template<typename T, typename C, typename A>
29
- quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
30
- total_weight_(0),
31
- entries_(allocator)
32
- {
33
- entries_.reserve(num);
34
- }
35
-
36
- template<typename T, typename C, typename A>
37
- template<typename Iterator>
38
- void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
39
- const size_t size_before = entries_.size();
40
- for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
41
- if (size_before > 0) {
42
- Container tmp(entries_.get_allocator());
43
- tmp.reserve(entries_.capacity());
44
- std::merge(
45
- entries_.begin(), entries_.begin() + size_before,
46
- entries_.begin() + size_before, entries_.end(),
47
- std::back_inserter(tmp), compare_pairs_by_first()
48
- );
49
- std::swap(tmp, entries_);
50
- }
51
- }
52
-
53
- template<typename T, typename C, typename A>
54
- template<bool inclusive>
55
- void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
56
- uint64_t subtotal = 0;
57
- for (auto& entry: entries_) {
58
- const uint64_t new_subtotal = subtotal + entry.second;
59
- entry.second = inclusive ? new_subtotal : subtotal;
60
- subtotal = new_subtotal;
61
- }
62
- total_weight_ = subtotal;
63
- }
64
-
65
- template<typename T, typename C, typename A>
66
- auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
67
- if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
68
- uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
69
- auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
70
- if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
71
- return deref_helper(it->first);
72
- }
73
-
74
- template<typename T, typename C, typename A>
75
- auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
76
- return entries_.begin();
77
- }
78
-
79
- template<typename T, typename C, typename A>
80
- auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
81
- return entries_.end();
82
- }
83
-
84
- template<typename T, typename C, typename A>
85
- size_t quantile_sketch_sorted_view<T, C, A>::size() const {
86
- return entries_.size();
87
- }
88
-
89
- } /* namespace datasketches */
90
-
91
- #endif