datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-05 00:00:00.000000000 Z
11
+ date: 2022-12-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -65,12 +65,14 @@ files:
65
65
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
66
66
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
67
67
  - vendor/datasketches-cpp/common/include/memory_operations.hpp
68
- - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp
69
- - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp
68
+ - vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
69
+ - vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
70
70
  - vendor/datasketches-cpp/common/include/serde.hpp
71
+ - vendor/datasketches-cpp/common/include/version.hpp.in
71
72
  - vendor/datasketches-cpp/common/test/CMakeLists.txt
72
73
  - vendor/datasketches-cpp/common/test/catch_runner.cpp
73
74
  - vendor/datasketches-cpp/common/test/integration_test.cpp
75
+ - vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
74
76
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
75
77
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
76
78
  - vendor/datasketches-cpp/common/test/test_type.hpp
@@ -171,6 +173,9 @@ files:
171
173
  - vendor/datasketches-cpp/pyproject.toml
172
174
  - vendor/datasketches-cpp/python/CMakeLists.txt
173
175
  - vendor/datasketches-cpp/python/README.md
176
+ - vendor/datasketches-cpp/python/datasketches/PySerDe.py
177
+ - vendor/datasketches-cpp/python/datasketches/__init__.py
178
+ - vendor/datasketches-cpp/python/include/py_serde.hpp
174
179
  - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
175
180
  - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
176
181
  - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
@@ -184,6 +189,7 @@ files:
184
189
  - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
185
190
  - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
186
191
  - vendor/datasketches-cpp/python/src/ks_wrapper.cpp
192
+ - vendor/datasketches-cpp/python/src/py_serde.cpp
187
193
  - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
188
194
  - vendor/datasketches-cpp/python/src/req_wrapper.cpp
189
195
  - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
@@ -283,6 +289,7 @@ files:
283
289
  - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
284
290
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
285
291
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
292
+ - vendor/datasketches-cpp/tox.ini
286
293
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
287
294
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
288
295
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
@@ -308,12 +315,14 @@ files:
308
315
  - vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
309
316
  - vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
310
317
  - vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
318
+ - vendor/datasketches-cpp/tuple/test/engagement_test.cpp
311
319
  - vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
312
320
  - vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
313
321
  - vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
314
322
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
315
323
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
316
324
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
325
+ - vendor/datasketches-cpp/version.cfg.in
317
326
  homepage: https://github.com/ankane/datasketches-ruby
318
327
  licenses:
319
328
  - Apache-2.0
@@ -326,7 +335,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
326
335
  requirements:
327
336
  - - ">="
328
337
  - !ruby/object:Gem::Version
329
- version: '2.6'
338
+ version: '2.7'
330
339
  required_rubygems_version: !ruby/object:Gem::Requirement
331
340
  requirements:
332
341
  - - ">="
@@ -1,91 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
21
- #define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
22
-
23
- #include <algorithm>
24
- #include <stdexcept>
25
-
26
- namespace datasketches {
27
-
28
- template<typename T, typename C, typename A>
29
- quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
30
- total_weight_(0),
31
- entries_(allocator)
32
- {
33
- entries_.reserve(num);
34
- }
35
-
36
- template<typename T, typename C, typename A>
37
- template<typename Iterator>
38
- void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
39
- const size_t size_before = entries_.size();
40
- for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
41
- if (size_before > 0) {
42
- Container tmp(entries_.get_allocator());
43
- tmp.reserve(entries_.capacity());
44
- std::merge(
45
- entries_.begin(), entries_.begin() + size_before,
46
- entries_.begin() + size_before, entries_.end(),
47
- std::back_inserter(tmp), compare_pairs_by_first()
48
- );
49
- std::swap(tmp, entries_);
50
- }
51
- }
52
-
53
- template<typename T, typename C, typename A>
54
- template<bool inclusive>
55
- void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
56
- uint64_t subtotal = 0;
57
- for (auto& entry: entries_) {
58
- const uint64_t new_subtotal = subtotal + entry.second;
59
- entry.second = inclusive ? new_subtotal : subtotal;
60
- subtotal = new_subtotal;
61
- }
62
- total_weight_ = subtotal;
63
- }
64
-
65
- template<typename T, typename C, typename A>
66
- auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
67
- if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
68
- uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
69
- auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
70
- if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
71
- return deref_helper(it->first);
72
- }
73
-
74
- template<typename T, typename C, typename A>
75
- auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
76
- return entries_.begin();
77
- }
78
-
79
- template<typename T, typename C, typename A>
80
- auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
81
- return entries_.end();
82
- }
83
-
84
- template<typename T, typename C, typename A>
85
- size_t quantile_sketch_sorted_view<T, C, A>::size() const {
86
- return entries_.size();
87
- }
88
-
89
- } /* namespace datasketches */
90
-
91
- #endif