datasketches 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -26,7 +26,8 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- typedef kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>> kll_test_type_sketch;
29
+ using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
30
+ using alloc = test_allocator<test_type>;
30
31
 
31
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
32
33
 
@@ -34,7 +35,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
34
35
  test_allocator_total_bytes = 0;
35
36
 
36
37
  SECTION("compact level zero") {
37
- kll_test_type_sketch sketch(8);
38
+ kll_test_type_sketch sketch(8, 0);
38
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
39
40
  REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
40
41
  REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
@@ -59,10 +60,10 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
59
60
  }
60
61
 
61
62
  SECTION("merge small") {
62
- kll_test_type_sketch sketch1(8);
63
+ kll_test_type_sketch sketch1(8, 0);
63
64
  sketch1.update(1);
64
65
 
65
- kll_test_type_sketch sketch2(8);
66
+ kll_test_type_sketch sketch2(8, 0);
66
67
  sketch2.update(2);
67
68
 
68
69
  sketch2.merge(sketch1);
@@ -76,7 +77,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
76
77
  }
77
78
 
78
79
  SECTION("merge higher levels") {
79
- kll_test_type_sketch sketch1(8);
80
+ kll_test_type_sketch sketch1(8, 0);
80
81
  sketch1.update(1);
81
82
  sketch1.update(2);
82
83
  sketch1.update(3);
@@ -87,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
87
88
  sketch1.update(8);
88
89
  sketch1.update(9);
89
90
 
90
- kll_test_type_sketch sketch2(8);
91
+ kll_test_type_sketch sketch2(8, 0);
91
92
  sketch2.update(10);
92
93
  sketch2.update(11);
93
94
  sketch2.update(12);
@@ -109,7 +110,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
109
110
  }
110
111
 
111
112
  SECTION("serialize deserialize") {
112
- kll_test_type_sketch sketch1;
113
+ kll_test_type_sketch sketch1(200, 0);
113
114
 
114
115
  const int n = 1000;
115
116
  for (int i = 0; i < n; i++) sketch1.update(i);
@@ -117,7 +118,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
117
118
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
118
119
  sketch1.serialize(s);
119
120
  REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
120
- auto sketch2 = kll_test_type_sketch::deserialize(s);
121
+ auto sketch2 = kll_test_type_sketch::deserialize(s, alloc(0));
121
122
  REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
122
123
  REQUIRE(s.tellg() == s.tellp());
123
124
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
@@ -135,9 +136,9 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
135
136
  }
136
137
 
137
138
  SECTION("moving merge") {
138
- kll_test_type_sketch sketch1(8);
139
+ kll_test_type_sketch sketch1(8, 0);
139
140
  for (int i = 0; i < 10; i++) sketch1.update(i);
140
- kll_test_type_sketch sketch2(8);
141
+ kll_test_type_sketch sketch2(8, 0);
141
142
  sketch2.update(10);
142
143
  sketch2.merge(std::move(sketch1));
143
144
  REQUIRE(sketch2.get_min_value().get_value() == 0);
@@ -48,14 +48,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
48
48
  test_allocator_total_bytes = 0;
49
49
 
50
50
  SECTION("k limits") {
51
- kll_float_sketch sketch1(kll_float_sketch::MIN_K); // this should work
52
- kll_float_sketch sketch2(kll_float_sketch::MAX_K); // this should work
53
- REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1), std::invalid_argument);
51
+ kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
52
+ kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
53
+ REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
54
54
  // MAX_K + 1 makes no sense because k is uint16_t
55
55
  }
56
56
 
57
57
  SECTION("empty") {
58
- kll_float_sketch sketch;
58
+ kll_float_sketch sketch(200, 0);
59
59
  REQUIRE(sketch.is_empty());
60
60
  REQUIRE_FALSE(sketch.is_estimation_mode());
61
61
  REQUIRE(sketch.get_n() == 0);
@@ -79,13 +79,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
79
79
  }
80
80
 
81
81
  SECTION("get bad quantile") {
82
- kll_float_sketch sketch;
82
+ kll_float_sketch sketch(200, 0);
83
83
  sketch.update(0); // has to be non-empty to reach the check
84
84
  REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
85
85
  }
86
86
 
87
87
  SECTION("one item") {
88
- kll_float_sketch sketch;
88
+ kll_float_sketch sketch(200, 0);
89
89
  sketch.update(1);
90
90
  REQUIRE_FALSE(sketch.is_empty());
91
91
  REQUIRE_FALSE(sketch.is_estimation_mode());
@@ -112,7 +112,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
112
112
  }
113
113
 
114
114
  SECTION("NaN") {
115
- kll_float_sketch sketch;
115
+ kll_float_sketch sketch(200, 0);
116
116
  sketch.update(std::numeric_limits<float>::quiet_NaN());
117
117
  REQUIRE(sketch.is_empty());
118
118
 
@@ -122,7 +122,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
122
122
  }
123
123
 
124
124
  SECTION("many items, exact mode") {
125
- kll_float_sketch sketch;
125
+ kll_float_sketch sketch(200, 0);
126
126
  const uint32_t n(200);
127
127
  for (uint32_t i = 0; i < n; i++) {
128
128
  sketch.update(i);
@@ -157,7 +157,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
157
157
  }
158
158
 
159
159
  SECTION("10 items") {
160
- kll_float_sketch sketch;
160
+ kll_float_sketch sketch(200, 0);
161
161
  sketch.update(1);
162
162
  sketch.update(2);
163
163
  sketch.update(3);
@@ -175,7 +175,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
175
175
  }
176
176
 
177
177
  SECTION("100 items") {
178
- kll_float_sketch sketch;
178
+ kll_float_sketch sketch(200, 0);
179
179
  for (int i = 0; i < 100; ++i) sketch.update(i);
180
180
  REQUIRE(sketch.get_quantile(0) == 0);
181
181
  REQUIRE(sketch.get_quantile(0.01) == 1);
@@ -185,7 +185,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
185
185
  }
186
186
 
187
187
  SECTION("many items, estimation mode") {
188
- kll_float_sketch sketch;
188
+ kll_float_sketch sketch(200, 0);
189
189
  const int n(1000000);
190
190
  for (int i = 0; i < n; i++) {
191
191
  sketch.update(i);
@@ -227,7 +227,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
227
227
  }
228
228
 
229
229
  SECTION("consistency between get_rank adn get_PMF/CDF") {
230
- kll_float_sketch sketch;
230
+ kll_float_sketch sketch(200, 0);
231
231
  const int n = 1000;
232
232
  float values[n];
233
233
  for (int i = 0; i < n; i++) {
@@ -256,7 +256,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
256
256
  std::ifstream is;
257
257
  is.exceptions(std::ios::failbit | std::ios::badbit);
258
258
  is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
259
- auto sketch = kll_float_sketch::deserialize(is);
259
+ auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
260
260
  REQUIRE_FALSE(sketch.is_empty());
261
261
  REQUIRE(sketch.is_estimation_mode());
262
262
  REQUIRE(sketch.get_n() == 1000000);
@@ -266,11 +266,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
266
266
  }
267
267
 
268
268
  SECTION("stream serialize deserialize empty") {
269
- kll_float_sketch sketch;
269
+ kll_float_sketch sketch(200, 0);
270
270
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
271
271
  sketch.serialize(s);
272
272
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
273
- auto sketch2 = kll_float_sketch::deserialize(s);
273
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
274
274
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
275
275
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
276
276
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -283,9 +283,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
283
283
  }
284
284
 
285
285
  SECTION("bytes serialize deserialize empty") {
286
- kll_float_sketch sketch;
286
+ kll_float_sketch sketch(200, 0);
287
287
  auto bytes = sketch.serialize();
288
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
288
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
289
289
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
290
290
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
291
291
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -298,12 +298,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
298
298
  }
299
299
 
300
300
  SECTION("serialize deserialize one item") {
301
- kll_float_sketch sketch;
301
+ kll_float_sketch sketch(200, 0);
302
302
  sketch.update(1);
303
303
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
304
304
  sketch.serialize(s);
305
305
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
306
- auto sketch2 = kll_float_sketch::deserialize(s);
306
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
307
307
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
308
308
  REQUIRE(s.tellg() == s.tellp());
309
309
  REQUIRE_FALSE(sketch2.is_empty());
@@ -321,7 +321,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
321
321
  std::ifstream is;
322
322
  is.exceptions(std::ios::failbit | std::ios::badbit);
323
323
  is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
324
- auto sketch = kll_float_sketch::deserialize(is);
324
+ auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
325
325
  REQUIRE_FALSE(sketch.is_empty());
326
326
  REQUIRE_FALSE(sketch.is_estimation_mode());
327
327
  REQUIRE(sketch.get_n() == 1);
@@ -331,13 +331,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
331
331
  }
332
332
 
333
333
  SECTION("stream serialize deserialize many floats") {
334
- kll_float_sketch sketch;
334
+ kll_float_sketch sketch(200, 0);
335
335
  const int n(1000);
336
336
  for (int i = 0; i < n; i++) sketch.update(i);
337
337
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
338
338
  sketch.serialize(s);
339
339
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
340
- auto sketch2 = kll_float_sketch::deserialize(s);
340
+ auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
341
341
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
342
342
  REQUIRE(s.tellg() == s.tellp());
343
343
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
@@ -354,12 +354,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
354
354
  }
355
355
 
356
356
  SECTION("bytes serialize deserialize many floats") {
357
- kll_float_sketch sketch;
357
+ kll_float_sketch sketch(200, 0);
358
358
  const int n(1000);
359
359
  for (int i = 0; i < n; i++) sketch.update(i);
360
360
  auto bytes = sketch.serialize();
361
361
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
362
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size());
362
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), 0);
363
363
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
364
364
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
365
365
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -414,7 +414,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
414
414
  }
415
415
 
416
416
  SECTION("out of order split points, float") {
417
- kll_float_sketch sketch;
417
+ kll_float_sketch sketch(200, 0);
418
418
  sketch.update(0); // has too be non-empty to reach the check
419
419
  float split_points[2] = {1, 0};
420
420
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
@@ -428,15 +428,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
428
428
  }
429
429
 
430
430
  SECTION("NaN split point") {
431
- kll_float_sketch sketch;
431
+ kll_float_sketch sketch(200, 0);
432
432
  sketch.update(0); // has too be non-empty to reach the check
433
433
  float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
434
434
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
435
435
  }
436
436
 
437
437
  SECTION("merge") {
438
- kll_float_sketch sketch1;
439
- kll_float_sketch sketch2;
438
+ kll_float_sketch sketch1(200, 0);
439
+ kll_float_sketch sketch2(200, 0);
440
440
  const int n = 10000;
441
441
  for (int i = 0; i < n; i++) {
442
442
  sketch1.update(i);
@@ -458,8 +458,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
458
458
  }
459
459
 
460
460
  SECTION("merge lower k") {
461
- kll_float_sketch sketch1(256);
462
- kll_float_sketch sketch2(128);
461
+ kll_float_sketch sketch1(256, 0);
462
+ kll_float_sketch sketch2(128, 0);
463
463
  const int n = 10000;
464
464
  for (int i = 0; i < n; i++) {
465
465
  sketch1.update(i);
@@ -471,6 +471,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
471
471
  REQUIRE(sketch2.get_min_value() == n);
472
472
  REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
473
473
 
474
+ REQUIRE(sketch1.get_k() == 256);
475
+ REQUIRE(sketch2.get_k() == 128);
476
+
474
477
  REQUIRE(sketch1.get_normalized_rank_error(false) < sketch2.get_normalized_rank_error(false));
475
478
  REQUIRE(sketch1.get_normalized_rank_error(true) < sketch2.get_normalized_rank_error(true));
476
479
 
@@ -488,8 +491,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
488
491
  }
489
492
 
490
493
  SECTION("merge exact mode, lower k") {
491
- kll_float_sketch sketch1(256);
492
- kll_float_sketch sketch2(128);
494
+ kll_float_sketch sketch1(256, 0);
495
+ kll_float_sketch sketch2(128, 0);
493
496
  const int n = 10000;
494
497
  for (int i = 0; i < n; i++) {
495
498
  sketch1.update(i);
@@ -513,8 +516,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
513
516
  }
514
517
 
515
518
  SECTION("merge min value from other") {
516
- kll_float_sketch sketch1;
517
- kll_float_sketch sketch2;
519
+ kll_float_sketch sketch1(200, 0);
520
+ kll_float_sketch sketch2(200, 0);
518
521
  sketch1.update(1);
519
522
  sketch2.update(2);
520
523
  sketch2.merge(sketch1);
@@ -523,9 +526,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
523
526
  }
524
527
 
525
528
  SECTION("merge min and max values from other") {
526
- kll_float_sketch sketch1;
529
+ kll_float_sketch sketch1(200, 0);
527
530
  for (int i = 0; i < 1000000; i++) sketch1.update(i);
528
- kll_float_sketch sketch2;
531
+ kll_float_sketch sketch2(200, 0);
529
532
  sketch2.merge(sketch1);
530
533
  REQUIRE(sketch2.get_min_value() == 0.0f);
531
534
  REQUIRE(sketch2.get_max_value() == 999999.0f);
@@ -560,7 +563,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
560
563
  }
561
564
 
562
565
  SECTION("sketch of strings stream") {
563
- kll_string_sketch sketch1;
566
+ kll_string_sketch sketch1(200, 0);
564
567
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
565
568
  REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
566
569
  REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
@@ -575,7 +578,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
575
578
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
576
579
  sketch1.serialize(s);
577
580
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
578
- auto sketch2 = kll_string_sketch::deserialize(s);
581
+ auto sketch2 = kll_string_sketch::deserialize(s, test_allocator<std::string>(0));
579
582
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
580
583
  REQUIRE(s.tellg() == s.tellp());
581
584
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
@@ -599,7 +602,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
599
602
  }
600
603
 
601
604
  SECTION("sketch of strings bytes") {
602
- kll_string_sketch sketch1;
605
+ kll_string_sketch sketch1(200, 0);
603
606
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
604
607
  REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
605
608
  REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
@@ -613,7 +616,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
613
616
 
614
617
  auto bytes = sketch1.serialize();
615
618
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
616
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
619
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), 0);
617
620
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
618
621
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
619
622
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
@@ -630,11 +633,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
630
633
 
631
634
 
632
635
  SECTION("sketch of strings, single item, bytes") {
633
- kll_string_sketch sketch1;
636
+ kll_string_sketch sketch1(200, 0);
634
637
  sketch1.update("a");
635
638
  auto bytes = sketch1.serialize();
636
639
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
637
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size());
640
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), 0);
638
641
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
639
642
  }
640
643
 
@@ -35,6 +35,7 @@ target_link_libraries(python
35
35
  fi
36
36
  theta
37
37
  sampling
38
+ req
38
39
  pybind11::module
39
40
  )
40
41
 
@@ -57,5 +58,6 @@ target_sources(python
57
58
  src/fi_wrapper.cpp
58
59
  src/theta_wrapper.cpp
59
60
  src/vo_wrapper.cpp
61
+ src/req_wrapper.cpp
60
62
  src/vector_of_kll.cpp
61
63
  )
@@ -1,4 +1,4 @@
1
- # Python Wrapper for Datasketches
1
+ # Python Wrapper for Apache DataSketches
2
2
 
3
3
  ## Installation
4
4
 
@@ -39,13 +39,16 @@ tox
39
39
 
40
40
  ## Usage
41
41
 
42
- Having installed the library, loading the Datasketches library in Python is simple: `import datasketches`.
42
+ Having installed the library, loading the Apache Datasketches library in Python is simple: `import datasketches`.
43
43
 
44
44
  ## Available Sketch Classes
45
45
 
46
- - KLL
46
+ - KLL (Absolute Error Quantiles)
47
47
  - `kll_ints_sketch`
48
48
  - `kll_floats_sketch`
49
+ - REQ (Relative Error Quantiles)
50
+ - `req_ints_sketch`
51
+ - `req_floats_sketch`
49
52
  - Frequent Items
50
53
  - `frequent_strings_sketch`
51
54
  - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
@@ -27,6 +27,7 @@ void init_fi(py::module& m);
27
27
  void init_cpc(py::module& m);
28
28
  void init_theta(py::module& m);
29
29
  void init_vo(py::module& m);
30
+ void init_req(py::module& m);
30
31
  void init_vector_of_kll(py::module& m);
31
32
 
32
33
  PYBIND11_MODULE(datasketches, m) {
@@ -36,5 +37,6 @@ PYBIND11_MODULE(datasketches, m) {
36
37
  init_cpc(m);
37
38
  init_theta(m);
38
39
  init_vo(m);
40
+ init_req(m);
39
41
  init_vector_of_kll(m);
40
42
  }
@@ -113,8 +113,6 @@ void init_hll(py::module &m) {
113
113
  "Returns the approximate lower error bound given the specified number of standard deviations in {1, 2, 3}")
114
114
  .def("get_upper_bound", &hll_union::get_upper_bound, py::arg("num_std_devs"),
115
115
  "Returns the approximate upper error bound given the specified number of standard deviations in {1, 2, 3}")
116
- .def("is_compact", &hll_union::is_compact,
117
- "True if the union is compact, otherwise False")
118
116
  .def("is_empty", &hll_union::is_empty,
119
117
  "True if the union is empty, otherwise False")
120
118
  .def("reset", &hll_union::reset,
@@ -130,6 +130,8 @@ void bind_kll_sketch(py::module &m, const char* name) {
130
130
  "Produces a string summary of the sketch")
131
131
  .def("is_empty", &kll_sketch<T>::is_empty,
132
132
  "Returns True if the sketch is empty, otherwise False")
133
+ .def("get_k", &kll_sketch<T>::get_k,
134
+ "Returns the configured parameter k")
133
135
  .def("get_n", &kll_sketch<T>::get_n,
134
136
  "Returns the length of the input stream")
135
137
  .def("get_num_retained", &kll_sketch<T>::get_num_retained,
@@ -198,7 +200,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
198
200
  "If pmf is True, returns the 'double-sided' normalized rank error for the get_PMF() function.\n"
199
201
  "Otherwise, it is the 'single-sided' normalized rank error for all the other queries.\n"
200
202
  "Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials")
201
- .def("serialize", &dspy::kll_sketch_serialize<T>, "Serailizes the sketch into a bytes object")
203
+ .def("serialize", &dspy::kll_sketch_serialize<T>, "Serializes the sketch into a bytes object")
202
204
  .def_static("deserialize", &dspy::kll_sketch_deserialize<T>, "Deserializes the sketch from a bytes object")
203
205
  ;
204
206
  }