datasketches 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-03 00:00:00.000000000 Z
11
+ date: 2023-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 4.0.2
19
+ version: '4.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 4.0.2
26
+ version: '4.1'
27
27
  description:
28
28
  email: andrew@ankane.org
29
29
  executables: []
@@ -47,8 +47,10 @@ files:
47
47
  - lib/datasketches.rb
48
48
  - lib/datasketches/version.rb
49
49
  - vendor/datasketches-cpp/CMakeLists.txt
50
+ - vendor/datasketches-cpp/CODE_OF_CONDUCT.md
51
+ - vendor/datasketches-cpp/CONTRIBUTING.md
52
+ - vendor/datasketches-cpp/Doxyfile
50
53
  - vendor/datasketches-cpp/LICENSE
51
- - vendor/datasketches-cpp/MANIFEST.in
52
54
  - vendor/datasketches-cpp/NOTICE
53
55
  - vendor/datasketches-cpp/README.md
54
56
  - vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
@@ -65,6 +67,7 @@ files:
65
67
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
66
68
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
67
69
  - vendor/datasketches-cpp/common/include/memory_operations.hpp
70
+ - vendor/datasketches-cpp/common/include/optional.hpp
68
71
  - vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
69
72
  - vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
70
73
  - vendor/datasketches-cpp/common/include/serde.hpp
@@ -72,6 +75,7 @@ files:
72
75
  - vendor/datasketches-cpp/common/test/CMakeLists.txt
73
76
  - vendor/datasketches-cpp/common/test/catch_runner.cpp
74
77
  - vendor/datasketches-cpp/common/test/integration_test.cpp
78
+ - vendor/datasketches-cpp/common/test/optional_test.cpp
75
79
  - vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
76
80
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
77
81
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
@@ -100,6 +104,8 @@ files:
100
104
  - vendor/datasketches-cpp/cpc/test/CMakeLists.txt
101
105
  - vendor/datasketches-cpp/cpc/test/compression_test.cpp
102
106
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
107
+ - vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp
108
+ - vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp
103
109
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
104
110
  - vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
105
111
  - vendor/datasketches-cpp/density/CMakeLists.txt
@@ -114,10 +120,9 @@ files:
114
120
  - vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp
115
121
  - vendor/datasketches-cpp/fi/test/CMakeLists.txt
116
122
  - vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp
123
+ - vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp
124
+ - vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp
117
125
  - vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp
118
- - vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk
119
- - vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk
120
- - vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk
121
126
  - vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp
122
127
  - vendor/datasketches-cpp/hll/CMakeLists.txt
123
128
  - vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp
@@ -163,12 +168,8 @@ files:
163
168
  - vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp
164
169
  - vendor/datasketches-cpp/hll/test/TablesTest.cpp
165
170
  - vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp
166
- - vendor/datasketches-cpp/hll/test/array6_from_java.sk
167
- - vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk
168
- - vendor/datasketches-cpp/hll/test/compact_set_from_java.sk
169
- - vendor/datasketches-cpp/hll/test/list_from_java.sk
170
- - vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk
171
- - vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk
171
+ - vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp
172
+ - vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp
172
173
  - vendor/datasketches-cpp/kll/CMakeLists.txt
173
174
  - vendor/datasketches-cpp/kll/include/kll_helper.hpp
174
175
  - vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
@@ -176,61 +177,12 @@ files:
176
177
  - vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
177
178
  - vendor/datasketches-cpp/kll/test/CMakeLists.txt
178
179
  - vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
180
+ - vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp
179
181
  - vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
180
- - vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk
182
+ - vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp
181
183
  - vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp
182
184
  - vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp
183
185
  - vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp
184
- - vendor/datasketches-cpp/pyproject.toml
185
- - vendor/datasketches-cpp/python/CMakeLists.txt
186
- - vendor/datasketches-cpp/python/README.md
187
- - vendor/datasketches-cpp/python/datasketches/DensityWrapper.py
188
- - vendor/datasketches-cpp/python/datasketches/KernelFunction.py
189
- - vendor/datasketches-cpp/python/datasketches/PySerDe.py
190
- - vendor/datasketches-cpp/python/datasketches/TuplePolicy.py
191
- - vendor/datasketches-cpp/python/datasketches/TupleWrapper.py
192
- - vendor/datasketches-cpp/python/datasketches/__init__.py
193
- - vendor/datasketches-cpp/python/include/kernel_function.hpp
194
- - vendor/datasketches-cpp/python/include/py_object_lt.hpp
195
- - vendor/datasketches-cpp/python/include/py_object_ostream.hpp
196
- - vendor/datasketches-cpp/python/include/py_serde.hpp
197
- - vendor/datasketches-cpp/python/include/quantile_conditional.hpp
198
- - vendor/datasketches-cpp/python/include/tuple_policy.hpp
199
- - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
200
- - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
201
- - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
202
- - vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb
203
- - vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
204
- - vendor/datasketches-cpp/python/pybind11Path.cmd
205
- - vendor/datasketches-cpp/python/src/__init__.py
206
- - vendor/datasketches-cpp/python/src/count_wrapper.cpp
207
- - vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
208
- - vendor/datasketches-cpp/python/src/datasketches.cpp
209
- - vendor/datasketches-cpp/python/src/density_wrapper.cpp
210
- - vendor/datasketches-cpp/python/src/fi_wrapper.cpp
211
- - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
212
- - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
213
- - vendor/datasketches-cpp/python/src/ks_wrapper.cpp
214
- - vendor/datasketches-cpp/python/src/py_serde.cpp
215
- - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
216
- - vendor/datasketches-cpp/python/src/req_wrapper.cpp
217
- - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
218
- - vendor/datasketches-cpp/python/src/tuple_wrapper.cpp
219
- - vendor/datasketches-cpp/python/src/vector_of_kll.cpp
220
- - vendor/datasketches-cpp/python/src/vo_wrapper.cpp
221
- - vendor/datasketches-cpp/python/tests/__init__.py
222
- - vendor/datasketches-cpp/python/tests/count_min_test.py
223
- - vendor/datasketches-cpp/python/tests/cpc_test.py
224
- - vendor/datasketches-cpp/python/tests/density_test.py
225
- - vendor/datasketches-cpp/python/tests/fi_test.py
226
- - vendor/datasketches-cpp/python/tests/hll_test.py
227
- - vendor/datasketches-cpp/python/tests/kll_test.py
228
- - vendor/datasketches-cpp/python/tests/quantiles_test.py
229
- - vendor/datasketches-cpp/python/tests/req_test.py
230
- - vendor/datasketches-cpp/python/tests/theta_test.py
231
- - vendor/datasketches-cpp/python/tests/tuple_test.py
232
- - vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
233
- - vendor/datasketches-cpp/python/tests/vo_test.py
234
186
  - vendor/datasketches-cpp/quantiles/CMakeLists.txt
235
187
  - vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
236
188
  - vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
@@ -245,6 +197,8 @@ files:
245
197
  - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
246
198
  - vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
247
199
  - vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
200
+ - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp
201
+ - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp
248
202
  - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
249
203
  - vendor/datasketches-cpp/req/CMakeLists.txt
250
204
  - vendor/datasketches-cpp/req/include/req_common.hpp
@@ -253,27 +207,30 @@ files:
253
207
  - vendor/datasketches-cpp/req/include/req_sketch.hpp
254
208
  - vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
255
209
  - vendor/datasketches-cpp/req/test/CMakeLists.txt
256
- - vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk
257
- - vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk
258
- - vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk
259
- - vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk
260
- - vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk
261
210
  - vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp
211
+ - vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp
212
+ - vendor/datasketches-cpp/req/test/req_sketch_serialize_for_java.cpp
262
213
  - vendor/datasketches-cpp/req/test/req_sketch_test.cpp
263
214
  - vendor/datasketches-cpp/sampling/CMakeLists.txt
215
+ - vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp
216
+ - vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp
217
+ - vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp
218
+ - vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp
264
219
  - vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp
265
220
  - vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp
266
221
  - vendor/datasketches-cpp/sampling/include/var_opt_union.hpp
267
222
  - vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp
268
223
  - vendor/datasketches-cpp/sampling/test/CMakeLists.txt
269
- - vendor/datasketches-cpp/sampling/test/binaries_from_java.txt
224
+ - vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp
225
+ - vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp
226
+ - vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp
270
227
  - vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp
228
+ - vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp
229
+ - vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp
271
230
  - vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp
231
+ - vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp
232
+ - vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp
272
233
  - vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp
273
- - vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk
274
- - vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk
275
- - vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
276
- - vendor/datasketches-cpp/setup.py
277
234
  - vendor/datasketches-cpp/theta/CMakeLists.txt
278
235
  - vendor/datasketches-cpp/theta/include/bit_packing.hpp
279
236
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
@@ -304,29 +261,27 @@ files:
304
261
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
305
262
  - vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
306
263
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
307
- - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
308
264
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
309
265
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
310
- - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
311
266
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
312
267
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
313
- - vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
314
- - vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
315
268
  - vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
316
269
  - vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
317
270
  - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
271
+ - vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp
272
+ - vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp
318
273
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
319
274
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
320
- - vendor/datasketches-cpp/tox.ini
321
275
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
322
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
323
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
324
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp
325
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp
326
276
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp
327
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp
328
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp
329
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp
277
+ - vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b.hpp
278
+ - vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b_impl.hpp
279
+ - vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp
280
+ - vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp
281
+ - vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp
282
+ - vendor/datasketches-cpp/tuple/include/array_tuple_sketch_impl.hpp
283
+ - vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp
284
+ - vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp
330
285
  - vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp
331
286
  - vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp
332
287
  - vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp
@@ -337,17 +292,16 @@ files:
337
292
  - vendor/datasketches-cpp/tuple/include/tuple_union.hpp
338
293
  - vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp
339
294
  - vendor/datasketches-cpp/tuple/test/CMakeLists.txt
340
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk
341
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk
342
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk
343
- - vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
344
- - vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
295
+ - vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp
296
+ - vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp
345
297
  - vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
346
298
  - vendor/datasketches-cpp/tuple/test/engagement_test.cpp
347
299
  - vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
348
300
  - vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
349
301
  - vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
350
302
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
303
+ - vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp
304
+ - vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp
351
305
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
352
306
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
353
307
  - vendor/datasketches-cpp/version.cfg.in
@@ -363,7 +317,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
363
317
  requirements:
364
318
  - - ">="
365
319
  - !ruby/object:Gem::Version
366
- version: '2.7'
320
+ version: '3'
367
321
  required_rubygems_version: !ruby/object:Gem::Requirement
368
322
  requirements:
369
323
  - - ">="
@@ -1,39 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- global-include CMakeLists.txt
19
- global-include *.cpp
20
- global-include *.c
21
- global-include *.hpp
22
- global-include *.h
23
- global-include *.bin
24
- global-include *.in
25
-
26
- graft cmake
27
- graft common
28
- graft cpc
29
- graft fi
30
- graft hll
31
- graft kll
32
- graft req
33
- graft theta
34
- graft tuple
35
- graft sampling
36
- graft python
37
-
38
- # exclusions appear after including subdirectories
39
- prune build
@@ -1,23 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- [build-system]
19
- requires = ["wheel",
20
- "setuptools >= 30.3.0",
21
- "cmake >= 3.16",
22
- "pybind11[global] >= 2.6.0"]
23
- build-backend = "setuptools.build_meta"
@@ -1,87 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
19
- find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
20
- else()
21
- find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
22
- endif()
23
-
24
- # only Windows+MSVC seems to have trouble locating pybind11
25
- if (MSVC)
26
- execute_process(COMMAND cmd.exe /c ${CMAKE_CURRENT_SOURCE_DIR}/pybind11Path.cmd "${Python3_EXECUTABLE}"
27
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
28
- OUTPUT_STRIP_TRAILING_WHITESPACE
29
- OUTPUT_VARIABLE EXTRA_PACKAGE_PATH)
30
- set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${EXTRA_PACKAGE_PATH})
31
- endif()
32
-
33
- find_package(pybind11 CONFIG REQUIRED)
34
-
35
- pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
36
-
37
- target_link_libraries(python
38
- PRIVATE
39
- common
40
- hll
41
- kll
42
- cpc
43
- fi
44
- theta
45
- tuple
46
- sampling
47
- req
48
- quantiles
49
- count
50
- density
51
- pybind11::module
52
- )
53
-
54
- set_target_properties(python PROPERTIES
55
- PREFIX ""
56
- OUTPUT_NAME _datasketches
57
- )
58
-
59
- target_include_directories(python
60
- PUBLIC
61
- $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
62
- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
63
- )
64
-
65
- # ensure we make a .so on Mac rather than .dylib
66
- if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
67
- set_target_properties(python PROPERTIES SUFFIX ".so")
68
- endif()
69
-
70
- target_sources(python
71
- PRIVATE
72
- src/datasketches.cpp
73
- src/hll_wrapper.cpp
74
- src/kll_wrapper.cpp
75
- src/cpc_wrapper.cpp
76
- src/fi_wrapper.cpp
77
- src/theta_wrapper.cpp
78
- src/tuple_wrapper.cpp
79
- src/vo_wrapper.cpp
80
- src/req_wrapper.cpp
81
- src/quantiles_wrapper.cpp
82
- src/density_wrapper.cpp
83
- src/ks_wrapper.cpp
84
- src/count_wrapper.cpp
85
- src/vector_of_kll.cpp
86
- src/py_serde.cpp
87
- )
@@ -1,85 +0,0 @@
1
- <img src="https://raw.githubusercontent.com/apache/datasketches-website/master/logos/svg/datasketches-HorizontalColor-TM.svg" width="75%" alt="Apache DataSketchs Logo">
2
-
3
- # The Apache DataSketches Library for Python
4
-
5
- This is the official version of the [Apache DataSketches](https://datasketches.apache.org) Python library.
6
-
7
- In the analysis of big data there are often problem queries that don’t scale because they require huge compute resources and time to generate exact results. Examples include count distinct, quantiles, most-frequent items, joins, matrix computations, and graph analysis.
8
-
9
- If approximate results are acceptable, there is a class of specialized algorithms, called streaming algorithms, or sketches that can produce results orders-of magnitude faster and with mathematically proven error bounds. For interactive queries there may not be other viable alternatives, and in the case of real-time analysis, sketches are the only known solution.
10
-
11
- This package provides a variety of sketches as described below. Wherever a specific type of sketch exists in Apache DataSketches packages for other languages, the sketches will be portable between languages (for platforms with the same endianness).
12
-
13
- ## Building and Installation
14
-
15
- Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely NumPy and [pybind11[global]](https://github.com/pybind/pybind11).
16
-
17
- If you prefer to call the `setup.py` build script directly, which is discouraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
18
-
19
- The library is also available from PyPI via `python3 -m pip install datasketches`.
20
-
21
- ## Usage
22
-
23
- Having installed the library, loading the Apache DataSketches Library in Python is simple: `import datasketches`.
24
-
25
- The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
26
-
27
- ## Available Sketch Classes
28
-
29
- - KLL (Absolute Error Quantiles)
30
- - `kll_ints_sketch`
31
- - `kll_floats_sketch`
32
- - `kll_doubles_sketch`
33
- - Quantiles (Absolute Error Quantiles, inferior algorithm)
34
- - `quantiles_ints_sketch`
35
- - `quantiles_floats_sketch`
36
- - `quantiles_doubles_sketch`
37
- - REQ (Relative Error Quantiles)
38
- - `req_ints_sketch`
39
- - `req_floats_sketch`
40
- - Frequent Items
41
- - `frequent_strings_sketch`
42
- - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
43
- - Theta
44
- - `update_theta_sketch`
45
- - `compact_theta_sketch` (cannot be instantiated directly)
46
- - `theta_union`
47
- - `theta_intersection`
48
- - `theta_a_not_b`
49
- - HLL
50
- - `hll_sketch`
51
- - `hll_union`
52
- - Target HLL types are `tgt_hll_type.{HLL_4 | HLL_6 | HLL_8}`
53
- - CPC
54
- - `cpc_sketch`
55
- - `cpc_union`
56
- - VarOpt Sampling
57
- - `var_opt_sketch`
58
- - `var_opt_union`
59
- - Vector of KLL
60
- - `vector_of_kll_ints_sketches`
61
- - `vector_of_kll_floats_sketches`
62
- - Kolmogorov-Smirnov Test
63
- - `ks_test` applied to a pair of matched-type Absolute Error quantiles sketches
64
-
65
- ## Known Differences from C++
66
-
67
- The Python API largely mirrors the C++ API, with a few minor exceptions: The primary known differences are that Python on modern platforms does not support unsigned integer values or numeric values with fewer than 64 bits. As a result, you may not be able to produce identical sketches from within Python as you can with Java and C++. Loading those sketches after they have been serialized from another language will work as expected.
68
-
69
- The Vector of KLL object is currently exclusive to python, and holds an array of independent KLL sketches. This is useful for creating a set of KLL sketches over a vector and has been designed to allow input as either a vector or a matrix of multiple vectors.
70
-
71
- We have also removed reliance on a builder class for theta sketches as Python allows named arguments to the constructor, not strictly positional arguments.
72
-
73
- ## Developer Instructions
74
-
75
- The only developer-specific instructions relate to running unit tests.
76
-
77
- ### Unit tests
78
-
79
- The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run the unit tests. In the event you are missing the necessary package, tox may be installed with `python3 -m pip install --upgrade tox`.
80
-
81
- ## License
82
-
83
- The Apache DataSketches Library is distributed under the Apache 2.0 License.
84
-
85
- There may be precompiled binaries provided as a convenience and distributed through PyPI via [https://pypi.org/project/datasketches/] contain compiled code from [pybind11](https://github.com/pybind/pybind11), which is distributed under a BSD license.
@@ -1,87 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- import numpy as np
19
-
20
- from _datasketches import _density_sketch, KernelFunction
21
- from .KernelFunction import GaussianKernel
22
-
23
- class density_sketch:
24
- """An instance of a Density Sketch for kernel density estimation. Requires a KernelFunction object."""
25
-
26
- def __init__(self, k:int, dim:int, kernel:KernelFunction=GaussianKernel()):
27
- self._kernel = kernel
28
- self._gadget = _density_sketch(k, dim, self._kernel)
29
-
30
- @classmethod
31
- def deserialize(cls, data:bytes, kernel:KernelFunction=GaussianKernel()):
32
- """Reads a bytes object and returns a density sketch, using the provided kerenl or defaulting to a Guassian kerenl"""
33
- self = cls.__new__(cls)
34
- self._kernel = kernel
35
- self._gadget = _density_sketch.deserialize(data, kernel)
36
- return self
37
-
38
- def update(self, point:np.array):
39
- """Updates the sketch with the given point"""
40
- self._gadget.update(point)
41
-
42
- def merge(self, other:'density_sketch'):
43
- """Merges the provided sketch into this one"""
44
- self._gadget.merge(other._gadget)
45
-
46
- def is_empty(self):
47
- """Returns True if the sketch is empty, otherwise False"""
48
- return self._gadget.is_empty()
49
-
50
- def get_k(self):
51
- """Returns the configured parameter k"""
52
- return self._gadget.get_k()
53
-
54
- def get_dim(self):
55
- """Returns the configured parameter dim"""
56
- return self._gadget.get_dim()
57
-
58
- def get_n(self):
59
- """Returns the length of the input stream"""
60
- return self._gadget.get_n()
61
-
62
- def get_num_retained(self):
63
- """Returns the number of retained items (samples) in the sketch"""
64
- return self._gadget.get_num_retained()
65
-
66
- def is_estimation_mode(self):
67
- """Returns True if the sketch is in estimation mode, otherwise False"""
68
- return self._gadget.is_estimation_mode()
69
-
70
- def get_estimate(self, point:np.array):
71
- """Returns an approximate density at the given point"""
72
- return self._gadget.get_estimate(point)
73
-
74
- def serialize(self):
75
- """Serializes the sketch into a bytes object"""
76
- return self._gadget.serialize()
77
-
78
- def __str__(self, print_levels:bool=False, print_items:bool=False):
79
- """Produces a string summary of the sketch"""
80
- return self._gadget.to_string(print_levels, print_items)
81
-
82
- def to_string(self, print_levels:bool=False, print_items:bool=False):
83
- """Produces a string summary of the sketch"""
84
- return self._gadget.to_string(print_levels, print_items)
85
-
86
- def __iter__(self):
87
- return self._gadget.__iter__()