datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(kll_test)
19
19
 
20
- target_link_libraries(kll_test kll common_test)
20
+ target_link_libraries(kll_test kll common_test_lib)
21
21
 
22
22
  set_target_properties(kll_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
29
+ using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_allocator<test_type>>;
30
30
  using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
@@ -35,11 +35,11 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
35
35
  test_allocator_total_bytes = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
- kll_test_type_sketch sketch(8, 0);
38
+ kll_test_type_sketch sketch(8, test_type_less(), 0);
39
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
41
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
42
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
40
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
42
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
43
43
 
44
44
  sketch.update(1);
45
45
  sketch.update(2);
@@ -55,15 +55,15 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
55
55
 
56
56
  REQUIRE(sketch.is_estimation_mode());
57
57
  REQUIRE(sketch.get_n() > sketch.get_num_retained());
58
- REQUIRE(sketch.get_min_value().get_value() == 1);
59
- REQUIRE(sketch.get_max_value().get_value() == 9);
58
+ REQUIRE(sketch.get_min_item().get_value() == 1);
59
+ REQUIRE(sketch.get_max_item().get_value() == 9);
60
60
  }
61
61
 
62
62
  SECTION("merge small") {
63
- kll_test_type_sketch sketch1(8, 0);
63
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
64
64
  sketch1.update(1);
65
65
 
66
- kll_test_type_sketch sketch2(8, 0);
66
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
67
67
  sketch2.update(2);
68
68
 
69
69
  sketch2.merge(sketch1);
@@ -72,12 +72,12 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
72
72
 
73
73
  REQUIRE_FALSE(sketch2.is_estimation_mode());
74
74
  REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
75
- REQUIRE(sketch2.get_min_value().get_value() == 1);
76
- REQUIRE(sketch2.get_max_value().get_value() == 2);
75
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
76
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
77
77
  }
78
78
 
79
79
  SECTION("merge higher levels") {
80
- kll_test_type_sketch sketch1(8, 0);
80
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
81
81
  sketch1.update(1);
82
82
  sketch1.update(2);
83
83
  sketch1.update(3);
@@ -88,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
88
88
  sketch1.update(8);
89
89
  sketch1.update(9);
90
90
 
91
- kll_test_type_sketch sketch2(8, 0);
91
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
92
92
  sketch2.update(10);
93
93
  sketch2.update(11);
94
94
  sketch2.update(12);
@@ -105,28 +105,28 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
105
105
 
106
106
  REQUIRE(sketch2.is_estimation_mode());
107
107
  REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
108
- REQUIRE(sketch2.get_min_value().get_value() == 1);
109
- REQUIRE(sketch2.get_max_value().get_value() == 18);
108
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
109
+ REQUIRE(sketch2.get_max_item().get_value() == 18);
110
110
  }
111
111
 
112
112
  SECTION("serialize deserialize") {
113
- kll_test_type_sketch sketch1(200, 0);
113
+ kll_test_type_sketch sketch1(200, test_type_less(), 0);
114
114
 
115
115
  const int n = 1000;
116
116
  for (int i = 0; i < n; i++) sketch1.update(i);
117
117
 
118
118
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
119
- sketch1.serialize(s);
120
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
121
- auto sketch2 = kll_test_type_sketch::deserialize(s, alloc(0));
122
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
119
+ sketch1.serialize(s, test_type_serde());
120
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
121
+ auto sketch2 = kll_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
122
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
123
123
  REQUIRE(s.tellg() == s.tellp());
124
124
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
125
125
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
126
126
  REQUIRE(sketch2.get_n() == sketch1.get_n());
127
127
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
128
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
129
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
128
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
129
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
130
130
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
131
131
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
132
132
  REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
@@ -136,13 +136,13 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
136
136
  }
137
137
 
138
138
  SECTION("moving merge") {
139
- kll_test_type_sketch sketch1(8, 0);
139
+ kll_test_type_sketch sketch1(8, test_type_less(), 0);
140
140
  for (int i = 0; i < 10; i++) sketch1.update(i);
141
- kll_test_type_sketch sketch2(8, 0);
141
+ kll_test_type_sketch sketch2(8, test_type_less(), 0);
142
142
  sketch2.update(10);
143
143
  sketch2.merge(std::move(sketch1));
144
- REQUIRE(sketch2.get_min_value().get_value() == 0);
145
- REQUIRE(sketch2.get_max_value().get_value() == 10);
144
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
145
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
146
146
  REQUIRE(sketch2.get_n() == 11);
147
147
  }
148
148