datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(kll_test)
19
19
 
20
- target_link_libraries(kll_test kll common_test)
20
+ target_link_libraries(kll_test kll common_test_lib)
21
21
 
22
22
  set_target_properties(kll_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
29
+ using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_allocator<test_type>>;
30
30
  using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
@@ -35,11 +35,11 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
35
35
  test_allocator_total_bytes = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
- kll_test_type_sketch sketch(8, 0);
38
+ kll_test_type_sketch sketch(8, test_type_less(), 0);
39
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
41
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
42
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
40
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
42
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
43
43
 
44
44
  sketch.update(1);
45
45
  sketch.update(2);
@@ -55,15 +55,15 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
55
55
 
56
56
  REQUIRE(sketch.is_estimation_mode());
57
57
  REQUIRE(sketch.get_n() > sketch.get_num_retained());
58
- REQUIRE(sketch.get_min_value().get_value() == 1);
59
- REQUIRE(sketch.get_max_value().get_value() == 9);
58
+ REQUIRE(sketch.get_min_item().get_value() == 1);
59
+ REQUIRE(sketch.get_max_item().get_value() == 9);
60
60
  }
61
61
 
62
62
  SECTION("merge small") {
63
- kll_test_type_sketch sketch1(8, 0);
63
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
64
64
  sketch1.update(1);
65
65
 
66
- kll_test_type_sketch sketch2(8, 0);
66
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
67
67
  sketch2.update(2);
68
68
 
69
69
  sketch2.merge(sketch1);
@@ -72,12 +72,12 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
72
72
 
73
73
  REQUIRE_FALSE(sketch2.is_estimation_mode());
74
74
  REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
75
- REQUIRE(sketch2.get_min_value().get_value() == 1);
76
- REQUIRE(sketch2.get_max_value().get_value() == 2);
75
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
76
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
77
77
  }
78
78
 
79
79
  SECTION("merge higher levels") {
80
- kll_test_type_sketch sketch1(8, 0);
80
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
81
81
  sketch1.update(1);
82
82
  sketch1.update(2);
83
83
  sketch1.update(3);
@@ -88,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
88
88
  sketch1.update(8);
89
89
  sketch1.update(9);
90
90
 
91
- kll_test_type_sketch sketch2(8, 0);
91
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
92
92
  sketch2.update(10);
93
93
  sketch2.update(11);
94
94
  sketch2.update(12);
@@ -105,28 +105,28 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
105
105
 
106
106
  REQUIRE(sketch2.is_estimation_mode());
107
107
  REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
108
- REQUIRE(sketch2.get_min_value().get_value() == 1);
109
- REQUIRE(sketch2.get_max_value().get_value() == 18);
108
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
109
+ REQUIRE(sketch2.get_max_item().get_value() == 18);
110
110
  }
111
111
 
112
112
  SECTION("serialize deserialize") {
113
- kll_test_type_sketch sketch1(200, 0);
113
+ kll_test_type_sketch sketch1(200, test_type_less(), 0);
114
114
 
115
115
  const int n = 1000;
116
116
  for (int i = 0; i < n; i++) sketch1.update(i);
117
117
 
118
118
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
119
- sketch1.serialize(s);
120
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
121
- auto sketch2 = kll_test_type_sketch::deserialize(s, alloc(0));
122
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
119
+ sketch1.serialize(s, test_type_serde());
120
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
121
+ auto sketch2 = kll_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
122
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
123
123
  REQUIRE(s.tellg() == s.tellp());
124
124
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
125
125
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
126
126
  REQUIRE(sketch2.get_n() == sketch1.get_n());
127
127
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
128
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
129
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
128
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
129
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
130
130
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
131
131
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
132
132
  REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
@@ -136,13 +136,13 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
136
136
  }
137
137
 
138
138
  SECTION("moving merge") {
139
- kll_test_type_sketch sketch1(8, 0);
139
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
140
140
  for (int i = 0; i < 10; i++) sketch1.update(i);
141
- kll_test_type_sketch sketch2(8, 0);
141
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
142
142
  sketch2.update(10);
143
143
  sketch2.merge(std::move(sketch1));
144
- REQUIRE(sketch2.get_min_value().get_value() == 0);
145
- REQUIRE(sketch2.get_max_value().get_value() == 10);
144
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
145
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
146
146
  REQUIRE(sketch2.get_n() == 11);
147
147
  }
148
148