datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -26,7 +26,8 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- typedef kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>> kll_test_type_sketch;
29
+ using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
30
+ using alloc = test_allocator<test_type>;
30
31
 
31
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
32
33
 
@@ -34,7 +35,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
34
35
  test_allocator_total_bytes = 0;
35
36
 
36
37
  SECTION("compact level zero") {
37
- kll_test_type_sketch sketch(8);
38
+ kll_test_type_sketch sketch(8, 0);
38
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
39
40
  REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
40
41
  REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
@@ -59,10 +60,10 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
59
60
  }
60
61
 
61
62
  SECTION("merge small") {
62
- kll_test_type_sketch sketch1(8);
63
+ kll_test_type_sketch sketch1(8, 0);
63
64
  sketch1.update(1);
64
65
 
65
- kll_test_type_sketch sketch2(8);
66
+ kll_test_type_sketch sketch2(8, 0);
66
67
  sketch2.update(2);
67
68
 
68
69
  sketch2.merge(sketch1);
@@ -76,7 +77,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
76
77
  }
77
78
 
78
79
  SECTION("merge higher levels") {
79
- kll_test_type_sketch sketch1(8);
80
+ kll_test_type_sketch sketch1(8, 0);
80
81
  sketch1.update(1);
81
82
  sketch1.update(2);
82
83
  sketch1.update(3);
@@ -87,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
87
88
  sketch1.update(8);
88
89
  sketch1.update(9);
89
90
 
90
- kll_test_type_sketch sketch2(8);
91
+ kll_test_type_sketch sketch2(8, 0);
91
92
  sketch2.update(10);
92
93
  sketch2.update(11);
93
94
  sketch2.update(12);
@@ -109,7 +110,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
109
110
  }
110
111
 
111
112
  SECTION("serialize deserialize") {
112
- kll_test_type_sketch sketch1;
113
+ kll_test_type_sketch sketch1(200, 0);
113
114
 
114
115
  const int n = 1000;
115
116
  for (int i = 0; i < n; i++) sketch1.update(i);
@@ -117,7 +118,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
117
118
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
118
119
  sketch1.serialize(s);
119
120
  REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
120
- auto sketch2 = kll_test_type_sketch::deserialize(s);
121
+ auto sketch2 = kll_test_type_sketch::deserialize(s, alloc(0));
121
122
  REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
122
123
  REQUIRE(s.tellg() == s.tellp());
123
124
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
@@ -135,9 +136,9 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
135
136
  }
136
137
 
137
138
  SECTION("moving merge") {
138
- kll_test_type_sketch sketch1(8);
139
+ kll_test_type_sketch sketch1(8, 0);
139
140
  for (int i = 0; i < 10; i++) sketch1.update(i);
140
- kll_test_type_sketch sketch2(8);
141
+ kll_test_type_sketch sketch2(8, 0);
141
142
  sketch2.update(10);
142
143
  sketch2.merge(std::move(sketch1));
143
144
  REQUIRE(sketch2.get_min_value().get_value() == 0);
@@ -48,14 +48,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
48
48
  test_allocator_total_bytes = 0;
49
49
 
50
50
  SECTION("k limits") {
51
- kll_float_sketch sketch1(kll_float_sketch::MIN_K); // this should work
52
- kll_float_sketch sketch2(kll_float_sketch::MAX_K); // this should work
53
- REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1), std::invalid_argument);
51
+ kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
52
+ kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
53
+ REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
54
54
  // MAX_K + 1 makes no sense because k is uint16_t
55
55
  }
56
56
 
57
57
  SECTION("empty") {
58
- kll_float_sketch sketch;
58
+ kll_float_sketch sketch(200, 0);
59
59
  REQUIRE(sketch.is_empty());
60
60
  REQUIRE_FALSE(sketch.is_estimation_mode());
61
61
  REQUIRE(sketch.get_n() == 0);
@@ -79,13 +79,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
79
79
  }
80
80
 
81
81
  SECTION("get bad quantile") {
82
- kll_float_sketch sketch;
82
+ kll_float_sketch sketch(200, 0);
83
83
  sketch.update(0); // has to be non-empty to reach the check
84
84
  REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
85
85
  }
86
86
 
87
87
  SECTION("one item") {
88
- kll_float_sketch sketch;
88
+ kll_float_sketch sketch(200, 0);
89
89
  sketch.update(1);
90
90
  REQUIRE_FALSE(sketch.is_empty());
91
91
  REQUIRE_FALSE(sketch.is_estimation_mode());
@@ -112,7 +112,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
112
112
  }
113
113
 
114
114
  SECTION("NaN") {
115
- kll_float_sketch sketch;
115
+ kll_float_sketch sketch(200, 0);
116
116
  sketch.update(std::numeric_limits<float>::quiet_NaN());
117
117
  REQUIRE(sketch.is_empty());
118
118
 
@@ -122,7 +122,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
122
122
  }
123
123
 
124
124
  SECTION("many items, exact mode") {
125
- kll_float_sketch sketch;
125
+ kll_float_sketch sketch(200, 0);
126
126
  const uint32_t n(200);
127
127
  for (uint32_t i = 0; i < n; i++) {
128
128
  sketch.update(i);
@@ -157,7 +157,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
157
157
  }
158
158
 
159
159
  SECTION("10 items") {
160
- kll_float_sketch sketch;
160
+ kll_float_sketch sketch(200, 0);
161
161
  sketch.update(1);
162
162
  sketch.update(2);
163
163
  sketch.update(3);
@@ -175,7 +175,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
175
175
  }
176
176
 
177
177
  SECTION("100 items") {
178
- kll_float_sketch sketch;
178
+ kll_float_sketch sketch(200, 0);
179
179
  for (int i = 0; i < 100; ++i) sketch.update(i);
180
180
  REQUIRE(sketch.get_quantile(0) == 0);
181
181
  REQUIRE(sketch.get_quantile(0.01) == 1);
@@ -185,7 +185,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
185
185
  }
186
186
 
187
187
  SECTION("many items, estimation mode") {
188
- kll_float_sketch sketch;
188
+ kll_float_sketch sketch(200, 0);
189
189
  const int n(1000000);
190
190
  for (int i = 0; i < n; i++) {
191
191
  sketch.update(i);
@@ -227,7 +227,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
227
227
  }
228
228
 
229
229
  SECTION("consistency between get_rank adn get_PMF/CDF") {
230
- kll_float_sketch sketch;
230
+ kll_float_sketch sketch(200, 0);
231
231
  const int n = 1000;
232
232
  float values[n];
233
233
  for (int i = 0; i < n; i++) {
@@ -256,7 +256,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
256
256
  std::ifstream is;
257
257
  is.exceptions(std::ios::failbit | std::ios::badbit);
258
258
  is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
259
- auto sketch = kll_float_sketch::deserialize(is);
259
+ auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
260
260
  REQUIRE_FALSE(sketch.is_empty());
261
261
  REQUIRE(sketch.is_estimation_mode());
262
262
  REQUIRE(sketch.get_n() == 1000000);
@@ -266,11 +266,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
266
266
  }
267
267
 
268
268
  SECTION("stream serialize deserialize empty") {
269
- kll_float_sketch sketch;
269
+ kll_float_sketch sketch(200, 0);
270
270
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
271
271
  sketch.serialize(s);
272
272
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
273
- auto sketch2 = kll_float_sketch::deserialize(s);
273
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
274
274
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
275
275
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
276
276
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -283,9 +283,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
283
283
  }
284
284
 
285
285
  SECTION("bytes serialize deserialize empty") {
286
- kll_float_sketch sketch;
286
+ kll_float_sketch sketch(200, 0);
287
287
  auto bytes = sketch.serialize();
288
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
288
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
289
289
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
290
290
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
291
291
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -298,12 +298,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
298
298
  }
299
299
 
300
300
  SECTION("serialize deserialize one item") {
301
- kll_float_sketch sketch;
301
+ kll_float_sketch sketch(200, 0);
302
302
  sketch.update(1);
303
303
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
304
304
  sketch.serialize(s);
305
305
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
306
- auto sketch2 = kll_float_sketch::deserialize(s);
306
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
307
307
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
308
308
  REQUIRE(s.tellg() == s.tellp());
309
309
  REQUIRE_FALSE(sketch2.is_empty());
@@ -321,7 +321,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
321
321
  std::ifstream is;
322
322
  is.exceptions(std::ios::failbit | std::ios::badbit);
323
323
  is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
324
- auto sketch = kll_float_sketch::deserialize(is);
324
+ auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
325
325
  REQUIRE_FALSE(sketch.is_empty());
326
326
  REQUIRE_FALSE(sketch.is_estimation_mode());
327
327
  REQUIRE(sketch.get_n() == 1);
@@ -331,13 +331,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
331
331
  }
332
332
 
333
333
  SECTION("stream serialize deserialize many floats") {
334
- kll_float_sketch sketch;
334
+ kll_float_sketch sketch(200, 0);
335
335
  const int n(1000);
336
336
  for (int i = 0; i < n; i++) sketch.update(i);
337
337
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
338
338
  sketch.serialize(s);
339
339
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
340
- auto sketch2 = kll_float_sketch::deserialize(s);
340
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
341
341
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
342
342
  REQUIRE(s.tellg() == s.tellp());
343
343
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
@@ -354,12 +354,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
354
354
  }
355
355
 
356
356
  SECTION("bytes serialize deserialize many floats") {
357
- kll_float_sketch sketch;
357
+ kll_float_sketch sketch(200, 0);
358
358
  const int n(1000);
359
359
  for (int i = 0; i < n; i++) sketch.update(i);
360
360
  auto bytes = sketch.serialize();
361
361
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
362
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
362
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
363
363
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
364
364
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
365
365
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -414,7 +414,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
414
414
  }
415
415
 
416
416
  SECTION("out of order split points, float") {
417
- kll_float_sketch sketch;
417
+ kll_float_sketch sketch(200, 0);
418
418
  sketch.update(0); // has too be non-empty to reach the check
419
419
  float split_points[2] = {1, 0};
420
420
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
@@ -428,15 +428,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
428
428
  }
429
429
 
430
430
  SECTION("NaN split point") {
431
- kll_float_sketch sketch;
431
+ kll_float_sketch sketch(200, 0);
432
432
  sketch.update(0); // has too be non-empty to reach the check
433
433
  float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
434
434
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
435
435
  }
436
436
 
437
437
  SECTION("merge") {
438
- kll_float_sketch sketch1;
439
- kll_float_sketch sketch2;
438
+ kll_float_sketch sketch1(200, 0);
439
+ kll_float_sketch sketch2(200, 0);
440
440
  const int n = 10000;
441
441
  for (int i = 0; i < n; i++) {
442
442
  sketch1.update(i);
@@ -458,8 +458,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
458
458
  }
459
459
 
460
460
  SECTION("merge lower k") {
461
- kll_float_sketch sketch1(256);
462
- kll_float_sketch sketch2(128);
461
+ kll_float_sketch sketch1(256, 0);
462
+ kll_float_sketch sketch2(128, 0);
463
463
  const int n = 10000;
464
464
  for (int i = 0; i < n; i++) {
465
465
  sketch1.update(i);
@@ -471,6 +471,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
471
471
  REQUIRE(sketch2.get_min_value() == n);
472
472
  REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
473
473
 
474
+ REQUIRE(sketch1.get_k() == 256);
475
+ REQUIRE(sketch2.get_k() == 128);
476
+
474
477
  REQUIRE(sketch1.get_normalized_rank_error(false) < sketch2.get_normalized_rank_error(false));
475
478
  REQUIRE(sketch1.get_normalized_rank_error(true) < sketch2.get_normalized_rank_error(true));
476
479
 
@@ -488,8 +491,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
488
491
  }
489
492
 
490
493
  SECTION("merge exact mode, lower k") {
491
- kll_float_sketch sketch1(256);
492
- kll_float_sketch sketch2(128);
494
+ kll_float_sketch sketch1(256, 0);
495
+ kll_float_sketch sketch2(128, 0);
493
496
  const int n = 10000;
494
497
  for (int i = 0; i < n; i++) {
495
498
  sketch1.update(i);
@@ -513,8 +516,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
513
516
  }
514
517
 
515
518
  SECTION("merge min value from other") {
516
- kll_float_sketch sketch1;
517
- kll_float_sketch sketch2;
519
+ kll_float_sketch sketch1(200, 0);
520
+ kll_float_sketch sketch2(200, 0);
518
521
  sketch1.update(1);
519
522
  sketch2.update(2);
520
523
  sketch2.merge(sketch1);
@@ -523,9 +526,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
523
526
  }
524
527
 
525
528
  SECTION("merge min and max values from other") {
526
- kll_float_sketch sketch1;
529
+ kll_float_sketch sketch1(200, 0);
527
530
  for (int i = 0; i < 1000000; i++) sketch1.update(i);
528
- kll_float_sketch sketch2;
531
+ kll_float_sketch sketch2(200, 0);
529
532
  sketch2.merge(sketch1);
530
533
  REQUIRE(sketch2.get_min_value() == 0.0f);
531
534
  REQUIRE(sketch2.get_max_value() == 999999.0f);
@@ -560,7 +563,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
560
563
  }
561
564
 
562
565
  SECTION("sketch of strings stream") {
563
- kll_string_sketch sketch1;
566
+ kll_string_sketch sketch1(200, 0);
564
567
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
565
568
  REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
566
569
  REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
@@ -575,7 +578,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
575
578
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
576
579
  sketch1.serialize(s);
577
580
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
578
- auto sketch2 = kll_string_sketch::deserialize(s);
581
+ auto sketch2 = kll_string_sketch::deserialize(s, test_allocator<std::string>(0));
579
582
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
580
583
  REQUIRE(s.tellg() == s.tellp());
581
584
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
@@ -599,7 +602,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
599
602
  }
600
603
 
601
604
  SECTION("sketch of strings bytes") {
602
- kll_string_sketch sketch1;
605
+ kll_string_sketch sketch1(200, 0);
603
606
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
604
607
  REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
605
608
  REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
@@ -613,7 +616,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
613
616
 
614
617
  auto bytes = sketch1.serialize();
615
618
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
616
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
619
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), 0);
617
620
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
618
621
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
619
622
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
@@ -630,11 +633,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
630
633
 
631
634
 
632
635
  SECTION("sketch of strings, single item, bytes") {
633
- kll_string_sketch sketch1;
636
+ kll_string_sketch sketch1(200, 0);
634
637
  sketch1.update("a");
635
638
  auto bytes = sketch1.serialize();
636
639
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
637
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
640
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), 0);
638
641
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
639
642
  }
640
643
 
@@ -35,6 +35,7 @@ target_link_libraries(python
35
35
  fi
36
36
  theta
37
37
  sampling
38
+ req
38
39
  pybind11::module
39
40
  )
40
41
 
@@ -57,5 +58,6 @@ target_sources(python
57
58
  src/fi_wrapper.cpp
58
59
  src/theta_wrapper.cpp
59
60
  src/vo_wrapper.cpp
61
+ src/req_wrapper.cpp
60
62
  src/vector_of_kll.cpp
61
63
  )
@@ -1,4 +1,4 @@
1
- # Python Wrapper for Datasketches
1
+ # Python Wrapper for Apache DataSketches
2
2
 
3
3
  ## Installation
4
4
 
@@ -39,13 +39,16 @@ tox
39
39
 
40
40
  ## Usage
41
41
 
42
- Having installed the library, loading the Datasketches library in Python is simple: `import datasketches`.
42
+ Having installed the library, loading the Apache Datasketches library in Python is simple: `import datasketches`.
43
43
 
44
44
  ## Available Sketch Classes
45
45
 
46
- - KLL
46
+ - KLL (Absolute Error Quantiles)
47
47
  - `kll_ints_sketch`
48
48
  - `kll_floats_sketch`
49
+ - REQ (Relative Error Quantiles)
50
+ - `req_ints_sketch`
51
+ - `req_floats_sketch`
49
52
  - Frequent Items
50
53
  - `frequent_strings_sketch`
51
54
  - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
@@ -27,6 +27,7 @@ void init_fi(py::module& m);
27
27
  void init_cpc(py::module& m);
28
28
  void init_theta(py::module& m);
29
29
  void init_vo(py::module& m);
30
+ void init_req(py::module& m);
30
31
  void init_vector_of_kll(py::module& m);
31
32
 
32
33
  PYBIND11_MODULE(datasketches, m) {
@@ -36,5 +37,6 @@ PYBIND11_MODULE(datasketches, m) {
36
37
  init_cpc(m);
37
38
  init_theta(m);
38
39
  init_vo(m);
40
+ init_req(m);
39
41
  init_vector_of_kll(m);
40
42
  }
@@ -113,8 +113,6 @@ void init_hll(py::module &m) {
113
113
  "Returns the approximate lower error bound given the specified number of standard deviations in {1, 2, 3}")
114
114
  .def("get_upper_bound", &hll_union::get_upper_bound, py::arg("num_std_devs"),
115
115
  "Returns the approximate upper error bound given the specified number of standard deviations in {1, 2, 3}")
116
- .def("is_compact", &hll_union::is_compact,
117
- "True if the union is compact, otherwise False")
118
116
  .def("is_empty", &hll_union::is_empty,
119
117
  "True if the union is empty, otherwise False")
120
118
  .def("reset", &hll_union::reset,
@@ -130,6 +130,8 @@ void bind_kll_sketch(py::module &m, const char* name) {
130
130
  "Produces a string summary of the sketch")
131
131
  .def("is_empty", &kll_sketch<T>::is_empty,
132
132
  "Returns True if the sketch is empty, otherwise False")
133
+ .def("get_k", &kll_sketch<T>::get_k,
134
+ "Returns the configured parameter k")
133
135
  .def("get_n", &kll_sketch<T>::get_n,
134
136
  "Returns the length of the input stream")
135
137
  .def("get_num_retained", &kll_sketch<T>::get_num_retained,
@@ -198,7 +200,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
198
200
  "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
199
201
  "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
200
202
  "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials")
201
- .def("serialize", &dspy::kll_sketch_serialize<T>, "Serailizes the sketch into a bytes object")
203
+ .def("serialize", &dspy::kll_sketch_serialize<T>, "Serializes the sketch into a bytes object")
202
204
  .def_static("deserialize", &dspy::kll_sketch_deserialize<T>, "Deserializes the sketch from a bytes object")
203
205
  ;
204
206
  }