datasketches 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +3 -3
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/README.md +1 -3
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  21. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  23. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  24. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  25. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
  26. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  27. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  28. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  29. data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
  30. data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
  31. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
  32. data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
  33. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  34. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
  35. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  36. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  37. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  38. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  39. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  40. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
  41. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  42. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  43. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  44. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  45. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  46. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  47. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  48. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
  49. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  50. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  51. data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
  52. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
  53. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  54. data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
  55. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  59. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  60. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  63. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  64. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  76. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  77. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
  78. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  79. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  80. data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
  81. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  82. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  83. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  84. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  85. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  86. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  87. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  88. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
  89. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
  90. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  91. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  92. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  93. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
  94. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
  95. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
  96. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  97. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  98. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  99. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
  100. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  101. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
  102. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
  103. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
  104. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  105. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  106. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  107. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  108. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  109. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  110. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  111. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  112. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  113. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
  114. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
  117. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  118. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  119. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  120. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  121. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  122. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  123. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
  124. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  125. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  126. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
  127. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  128. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  129. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  130. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  131. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  132. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  133. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  134. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  135. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  137. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  140. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  141. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  142. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
  143. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  144. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  145. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
  146. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  147. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
  148. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
  149. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
  150. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  151. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  152. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  153. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
  154. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  155. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  157. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  158. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  159. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  160. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
  161. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  162. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  163. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  164. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  165. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  166. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  167. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  168. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  169. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  170. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  171. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  172. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  173. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  174. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  175. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  176. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  177. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  178. data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
  179. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  180. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  181. metadata +61 -79
  182. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  183. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  184. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  185. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  188. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  189. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  190. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  191. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  192. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  193. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
  194. data/vendor/datasketches-cpp/python/README.md +0 -85
  195. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
  196. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  197. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  198. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  199. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  200. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  201. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  202. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  203. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
  204. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
  205. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
  206. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
  207. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  208. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
  209. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
  210. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
  211. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
  212. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  213. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  214. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  215. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  216. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
  217. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
  218. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
  219. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
  220. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
  221. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
  222. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  223. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
  224. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  225. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  230. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  231. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  232. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  233. data/vendor/datasketches-cpp/setup.py +0 -110
  234. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  238. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  239. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  240. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  241. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  242. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  243. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  244. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  245. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-31 00:00:00.000000000 Z
11
+ date: 2023-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 4.0.2
19
+ version: '4.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 4.0.2
26
+ version: '4.1'
27
27
  description:
28
28
  email: andrew@ankane.org
29
29
  executables: []
@@ -47,8 +47,10 @@ files:
47
47
  - lib/datasketches.rb
48
48
  - lib/datasketches/version.rb
49
49
  - vendor/datasketches-cpp/CMakeLists.txt
50
+ - vendor/datasketches-cpp/CODE_OF_CONDUCT.md
51
+ - vendor/datasketches-cpp/CONTRIBUTING.md
52
+ - vendor/datasketches-cpp/Doxyfile
50
53
  - vendor/datasketches-cpp/LICENSE
51
- - vendor/datasketches-cpp/MANIFEST.in
52
54
  - vendor/datasketches-cpp/NOTICE
53
55
  - vendor/datasketches-cpp/README.md
54
56
  - vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
@@ -65,6 +67,7 @@ files:
65
67
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
66
68
  - vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
67
69
  - vendor/datasketches-cpp/common/include/memory_operations.hpp
70
+ - vendor/datasketches-cpp/common/include/optional.hpp
68
71
  - vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp
69
72
  - vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp
70
73
  - vendor/datasketches-cpp/common/include/serde.hpp
@@ -72,10 +75,17 @@ files:
72
75
  - vendor/datasketches-cpp/common/test/CMakeLists.txt
73
76
  - vendor/datasketches-cpp/common/test/catch_runner.cpp
74
77
  - vendor/datasketches-cpp/common/test/integration_test.cpp
78
+ - vendor/datasketches-cpp/common/test/optional_test.cpp
75
79
  - vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp
76
80
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
77
81
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
78
82
  - vendor/datasketches-cpp/common/test/test_type.hpp
83
+ - vendor/datasketches-cpp/count/CMakeLists.txt
84
+ - vendor/datasketches-cpp/count/include/count_min.hpp
85
+ - vendor/datasketches-cpp/count/include/count_min_impl.hpp
86
+ - vendor/datasketches-cpp/count/test/CMakeLists.txt
87
+ - vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp
88
+ - vendor/datasketches-cpp/count/test/count_min_test.cpp
79
89
  - vendor/datasketches-cpp/cpc/CMakeLists.txt
80
90
  - vendor/datasketches-cpp/cpc/include/compression_data.hpp
81
91
  - vendor/datasketches-cpp/cpc/include/cpc_common.hpp
@@ -94,8 +104,15 @@ files:
94
104
  - vendor/datasketches-cpp/cpc/test/CMakeLists.txt
95
105
  - vendor/datasketches-cpp/cpc/test/compression_test.cpp
96
106
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
107
+ - vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp
108
+ - vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp
97
109
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
98
110
  - vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
111
+ - vendor/datasketches-cpp/density/CMakeLists.txt
112
+ - vendor/datasketches-cpp/density/include/density_sketch.hpp
113
+ - vendor/datasketches-cpp/density/include/density_sketch_impl.hpp
114
+ - vendor/datasketches-cpp/density/test/CMakeLists.txt
115
+ - vendor/datasketches-cpp/density/test/density_sketch_test.cpp
99
116
  - vendor/datasketches-cpp/fi/CMakeLists.txt
100
117
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp
101
118
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp
@@ -103,10 +120,9 @@ files:
103
120
  - vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp
104
121
  - vendor/datasketches-cpp/fi/test/CMakeLists.txt
105
122
  - vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp
123
+ - vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp
124
+ - vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp
106
125
  - vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp
107
- - vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk
108
- - vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk
109
- - vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk
110
126
  - vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp
111
127
  - vendor/datasketches-cpp/hll/CMakeLists.txt
112
128
  - vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp
@@ -152,12 +168,8 @@ files:
152
168
  - vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp
153
169
  - vendor/datasketches-cpp/hll/test/TablesTest.cpp
154
170
  - vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp
155
- - vendor/datasketches-cpp/hll/test/array6_from_java.sk
156
- - vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk
157
- - vendor/datasketches-cpp/hll/test/compact_set_from_java.sk
158
- - vendor/datasketches-cpp/hll/test/list_from_java.sk
159
- - vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk
160
- - vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk
171
+ - vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp
172
+ - vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp
161
173
  - vendor/datasketches-cpp/kll/CMakeLists.txt
162
174
  - vendor/datasketches-cpp/kll/include/kll_helper.hpp
163
175
  - vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
@@ -165,46 +177,12 @@ files:
165
177
  - vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
166
178
  - vendor/datasketches-cpp/kll/test/CMakeLists.txt
167
179
  - vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
180
+ - vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp
168
181
  - vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
169
- - vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk
182
+ - vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp
170
183
  - vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp
171
184
  - vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp
172
185
  - vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp
173
- - vendor/datasketches-cpp/pyproject.toml
174
- - vendor/datasketches-cpp/python/CMakeLists.txt
175
- - vendor/datasketches-cpp/python/README.md
176
- - vendor/datasketches-cpp/python/datasketches/PySerDe.py
177
- - vendor/datasketches-cpp/python/datasketches/__init__.py
178
- - vendor/datasketches-cpp/python/include/py_serde.hpp
179
- - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
180
- - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
181
- - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
182
- - vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb
183
- - vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
184
- - vendor/datasketches-cpp/python/pybind11Path.cmd
185
- - vendor/datasketches-cpp/python/src/__init__.py
186
- - vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
187
- - vendor/datasketches-cpp/python/src/datasketches.cpp
188
- - vendor/datasketches-cpp/python/src/fi_wrapper.cpp
189
- - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
190
- - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
191
- - vendor/datasketches-cpp/python/src/ks_wrapper.cpp
192
- - vendor/datasketches-cpp/python/src/py_serde.cpp
193
- - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
194
- - vendor/datasketches-cpp/python/src/req_wrapper.cpp
195
- - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
196
- - vendor/datasketches-cpp/python/src/vector_of_kll.cpp
197
- - vendor/datasketches-cpp/python/src/vo_wrapper.cpp
198
- - vendor/datasketches-cpp/python/tests/__init__.py
199
- - vendor/datasketches-cpp/python/tests/cpc_test.py
200
- - vendor/datasketches-cpp/python/tests/fi_test.py
201
- - vendor/datasketches-cpp/python/tests/hll_test.py
202
- - vendor/datasketches-cpp/python/tests/kll_test.py
203
- - vendor/datasketches-cpp/python/tests/quantiles_test.py
204
- - vendor/datasketches-cpp/python/tests/req_test.py
205
- - vendor/datasketches-cpp/python/tests/theta_test.py
206
- - vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
207
- - vendor/datasketches-cpp/python/tests/vo_test.py
208
186
  - vendor/datasketches-cpp/quantiles/CMakeLists.txt
209
187
  - vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
210
188
  - vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
@@ -219,6 +197,8 @@ files:
219
197
  - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
220
198
  - vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
221
199
  - vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
200
+ - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp
201
+ - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp
222
202
  - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
223
203
  - vendor/datasketches-cpp/req/CMakeLists.txt
224
204
  - vendor/datasketches-cpp/req/include/req_common.hpp
@@ -227,28 +207,32 @@ files:
227
207
  - vendor/datasketches-cpp/req/include/req_sketch.hpp
228
208
  - vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
229
209
  - vendor/datasketches-cpp/req/test/CMakeLists.txt
230
- - vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk
231
- - vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk
232
- - vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk
233
- - vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk
234
- - vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk
235
210
  - vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp
211
+ - vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp
212
+ - vendor/datasketches-cpp/req/test/req_sketch_serialize_for_java.cpp
236
213
  - vendor/datasketches-cpp/req/test/req_sketch_test.cpp
237
214
  - vendor/datasketches-cpp/sampling/CMakeLists.txt
215
+ - vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp
216
+ - vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp
217
+ - vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp
218
+ - vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp
238
219
  - vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp
239
220
  - vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp
240
221
  - vendor/datasketches-cpp/sampling/include/var_opt_union.hpp
241
222
  - vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp
242
223
  - vendor/datasketches-cpp/sampling/test/CMakeLists.txt
243
- - vendor/datasketches-cpp/sampling/test/binaries_from_java.txt
224
+ - vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp
225
+ - vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp
226
+ - vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp
244
227
  - vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp
228
+ - vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp
229
+ - vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp
245
230
  - vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp
231
+ - vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp
232
+ - vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp
246
233
  - vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp
247
- - vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk
248
- - vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk
249
- - vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
250
- - vendor/datasketches-cpp/setup.py
251
234
  - vendor/datasketches-cpp/theta/CMakeLists.txt
235
+ - vendor/datasketches-cpp/theta/include/bit_packing.hpp
252
236
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
253
237
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp
254
238
  - vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp
@@ -275,30 +259,29 @@ files:
275
259
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp
276
260
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp
277
261
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
262
+ - vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
278
263
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
279
- - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
280
264
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
281
265
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
282
- - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
283
266
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
284
267
  - vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
285
- - vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
286
- - vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
287
268
  - vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
288
269
  - vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
289
270
  - vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
271
+ - vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp
272
+ - vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp
290
273
  - vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
291
274
  - vendor/datasketches-cpp/theta/test/theta_union_test.cpp
292
- - vendor/datasketches-cpp/tox.ini
293
275
  - vendor/datasketches-cpp/tuple/CMakeLists.txt
294
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
295
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
296
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp
297
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp
298
276
  - vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp
299
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp
300
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp
301
- - vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp
277
+ - vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b.hpp
278
+ - vendor/datasketches-cpp/tuple/include/array_tuple_a_not_b_impl.hpp
279
+ - vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp
280
+ - vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp
281
+ - vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp
282
+ - vendor/datasketches-cpp/tuple/include/array_tuple_sketch_impl.hpp
283
+ - vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp
284
+ - vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp
302
285
  - vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp
303
286
  - vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp
304
287
  - vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp
@@ -309,17 +292,16 @@ files:
309
292
  - vendor/datasketches-cpp/tuple/include/tuple_union.hpp
310
293
  - vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp
311
294
  - vendor/datasketches-cpp/tuple/test/CMakeLists.txt
312
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk
313
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk
314
- - vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk
315
- - vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk
316
- - vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk
295
+ - vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp
296
+ - vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp
317
297
  - vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp
318
298
  - vendor/datasketches-cpp/tuple/test/engagement_test.cpp
319
299
  - vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp
320
300
  - vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp
321
301
  - vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp
322
302
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
303
+ - vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp
304
+ - vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp
323
305
  - vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
324
306
  - vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
325
307
  - vendor/datasketches-cpp/version.cfg.in
@@ -335,14 +317,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
335
317
  requirements:
336
318
  - - ">="
337
319
  - !ruby/object:Gem::Version
338
- version: '2.7'
320
+ version: '3'
339
321
  required_rubygems_version: !ruby/object:Gem::Requirement
340
322
  requirements:
341
323
  - - ">="
342
324
  - !ruby/object:Gem::Version
343
325
  version: '0'
344
326
  requirements: []
345
- rubygems_version: 3.4.1
327
+ rubygems_version: 3.4.10
346
328
  signing_key:
347
329
  specification_version: 4
348
330
  summary: Sketch data structures for Ruby
@@ -1,23 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- [build-system]
19
- requires = ["wheel",
20
- "setuptools >= 30.3.0",
21
- "cmake >= 3.16",
22
- "pybind11[global] >= 2.6.0"]
23
- build-backend = "setuptools.build_meta"
@@ -1,81 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0")
19
- find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
20
- else()
21
- find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
22
- endif()
23
-
24
- # only Windows+MSVC seems to have trouble locating pybind11
25
- if (MSVC)
26
- execute_process(COMMAND cmd.exe /c ${CMAKE_CURRENT_SOURCE_DIR}/pybind11Path.cmd "${Python3_EXECUTABLE}"
27
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
28
- OUTPUT_STRIP_TRAILING_WHITESPACE
29
- OUTPUT_VARIABLE EXTRA_PACKAGE_PATH)
30
- set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} ${EXTRA_PACKAGE_PATH})
31
- endif()
32
-
33
- find_package(pybind11 CONFIG REQUIRED)
34
-
35
- pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
36
-
37
- target_link_libraries(python
38
- PRIVATE
39
- common
40
- hll
41
- kll
42
- cpc
43
- fi
44
- theta
45
- sampling
46
- req
47
- quantiles
48
- pybind11::module
49
- )
50
-
51
- set_target_properties(python PROPERTIES
52
- PREFIX ""
53
- OUTPUT_NAME _datasketches
54
- )
55
-
56
- target_include_directories(python
57
- PUBLIC
58
- $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
59
- $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
60
- )
61
-
62
- # ensure we make a .so on Mac rather than .dylib
63
- if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
64
- set_target_properties(python PROPERTIES SUFFIX ".so")
65
- endif()
66
-
67
- target_sources(python
68
- PRIVATE
69
- src/datasketches.cpp
70
- src/hll_wrapper.cpp
71
- src/kll_wrapper.cpp
72
- src/cpc_wrapper.cpp
73
- src/fi_wrapper.cpp
74
- src/theta_wrapper.cpp
75
- src/vo_wrapper.cpp
76
- src/req_wrapper.cpp
77
- src/quantiles_wrapper.cpp
78
- src/ks_wrapper.cpp
79
- src/vector_of_kll.cpp
80
- src/py_serde.cpp
81
- )
@@ -1,85 +0,0 @@
1
- <img src="https://raw.githubusercontent.com/apache/datasketches-website/master/logos/svg/datasketches-HorizontalColor-TM.svg" width="75%" alt="Apache DataSketchs Logo">
2
-
3
- # The Apache DataSketches Library for Python
4
-
5
- This is the official version of the [Apache DataSketches](https://datasketches.apache.org) Python library.
6
-
7
- In the analysis of big data there are often problem queries that don’t scale because they require huge compute resources and time to generate exact results. Examples include count distinct, quantiles, most-frequent items, joins, matrix computations, and graph analysis.
8
-
9
- If approximate results are acceptable, there is a class of specialized algorithms, called streaming algorithms, or sketches that can produce results orders-of magnitude faster and with mathematically proven error bounds. For interactive queries there may not be other viable alternatives, and in the case of real-time analysis, sketches are the only known solution.
10
-
11
- This package provides a variety of sketches as described below. Wherever a specific type of sketch exists in Apache DataSketches packages for other languages, the sketches will be portable between languages (for platforms with the same endianness).
12
-
13
- ## Building and Installation
14
-
15
- Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
16
-
17
- If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
18
-
19
- The library is also available from PyPI via `python3 -m pip install datasketches`.
20
-
21
- ## Usage
22
-
23
- Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
24
-
25
- The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
26
-
27
- ## Available Sketch Classes
28
-
29
- - KLL (Absolute Error Quantiles)
30
- - `kll_ints_sketch`
31
- - `kll_floats_sketch`
32
- - `kll_doubles_sketch`
33
- - Quantiles (Absolute Error Quantiles, inferior algorithm)
34
- - `quantiles_ints_sketch`
35
- - `quantiles_floats_sketch`
36
- - `quantiles_doubles_sketch`
37
- - REQ (Relative Error Quantiles)
38
- - `req_ints_sketch`
39
- - `req_floats_sketch`
40
- - Frequent Items
41
- - `frequent_strings_sketch`
42
- - Error types are `frequent_items_error_type.{NO_FALSE_NEGATIVES | NO_FALSE_POSITIVES}`
43
- - Theta
44
- - `update_theta_sketch`
45
- - `compact_theta_sketch` (cannot be instantiated directly)
46
- - `theta_union`
47
- - `theta_intersection`
48
- - `theta_a_not_b`
49
- - HLL
50
- - `hll_sketch`
51
- - `hll_union`
52
- - Target HLL types are `tgt_hll_type.{HLL_4 | HLL_6 | HLL_8}`
53
- - CPC
54
- - `cpc_sketch`
55
- - `cpc_union`
56
- - VarOpt Sampling
57
- - `var_opt_sketch`
58
- - `var_opt_union`
59
- - Vector of KLL
60
- - `vector_of_kll_ints_sketches`
61
- - `vector_of_kll_floats_sketches`
62
- - Kolmogorov-Smirnov Test
63
- - `ks_test` applied to a pair of matched-type Absolute Error quantiles sketches
64
-
65
- ## Known Differences from C++
66
-
67
- The Python API largely mirrors the C++ API, with a few minor exceptions: The primary known differences are that Python on modern platforms does not support unsigned integer values or numeric values with fewer than 64 bits. As a result, you may not be able to produce identical sketches from within Python as you can with Java and C++. Loading those sketches after they have been serialized from another language will work as expected.
68
-
69
- The Vector of KLL object is currently exclusive to python, and holds an array of independent KLL sketches. This is useful for creating a set of KLL sketches over a vector and has been designed to allow input as either a vector or a matrix of multiple vectors.
70
-
71
- We have also removed reliance on a builder class for theta sketches as Python allows named arguments to the constructor, not strictly positional arguments.
72
-
73
- ## Developer Instructions
74
-
75
- The only developer-specific instructions relate to running unit tests.
76
-
77
- ### Unit tests
78
-
79
- The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run the unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
80
-
81
- ## License
82
-
83
- The Apache DataSketches Library is distrubted under an Apache 2.0 License.
84
-
85
- There may be precompiled binaries provided as a convenience and distributed through PyPI via [https://pypi.org/project/datasketches/] contain compiled code from [pybind11](https://github.com/pybind/pybind11), which is distributed under a BSD license.
@@ -1,104 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- from _datasketches import PyObjectSerDe
19
-
20
- import struct
21
-
22
- # This file provides several Python SerDe implementation examples.
23
- #
24
- # Each implementation must extend the PyObjectSerDe class and define
25
- # three methods:
26
- # * get_size(item) returns an int of the number of bytes needed to
27
- # serialize the given item
28
- # * to_bytes(item) returns a bytes object representing a serialized
29
- # version of the given item
30
- # * from_bytes(data, offset) takes a bytes object (data) and an offset
31
- # indicating where in the data array to start reading. The method
32
- # returns a tuple with the newly reconstructed object and the
33
- # total number of bytes beyond the offset read from the input data.
34
-
35
- # Implements a simple string-encoding scheme where a string is
36
- # written as <num_bytes> <string_contents>, with no null termination.
37
- # This format allows pre-allocating each string, at the cost of
38
- # additional storage. Using this format, the serialized string consumes
39
- # 4 + len(item) bytes.
40
- class PyStringsSerDe(PyObjectSerDe):
41
- def get_size(self, item):
42
- return int(4 + len(item))
43
-
44
- def to_bytes(self, item: str):
45
- b = bytearray()
46
- b.extend(len(item).to_bytes(4, 'little'))
47
- b.extend(map(ord,item))
48
- return bytes(b)
49
-
50
- def from_bytes(self, data: bytes, offset: int):
51
- num_chars = int.from_bytes(data[offset:offset+3], 'little')
52
- if (num_chars < 0 or num_chars > offset + len(data)):
53
- raise IndexError(f'num_chars read must be non-negative and not larger than the buffer. Found {num_chars}')
54
- str = data[offset+4:offset+4+num_chars].decode()
55
- return (str, 4+num_chars)
56
-
57
- # Implements an integer-encoding scheme where each integer is written
58
- # as a 32-bit (4 byte) little-endian value.
59
- class PyIntsSerDe(PyObjectSerDe):
60
- def get_size(self, item):
61
- return int(4)
62
-
63
- def to_bytes(self, item):
64
- return struct.pack('i', item)
65
-
66
- def from_bytes(self, data: bytes, offset: int):
67
- val = struct.unpack_from('i', data, offset)[0]
68
- return (val, 4)
69
-
70
-
71
- class PyLongsSerDe(PyObjectSerDe):
72
- def get_size(self, item):
73
- return int(8)
74
-
75
- def to_bytes(self, item):
76
- return struct.pack('l', item)
77
-
78
- def from_bytes(self, data: bytes, offset: int):
79
- val = struct.unpack_from('l', data, offset)[0]
80
- return (val, 8)
81
-
82
-
83
- class PyFloatsSerDe(PyObjectSerDe):
84
- def get_size(self, item):
85
- return int(4)
86
-
87
- def to_bytes(self, item):
88
- return struct.pack('f', item)
89
-
90
- def from_bytes(self, data: bytes, offset: int):
91
- val = struct.unpack_from('f', data, offset)[0]
92
- return (val, 4)
93
-
94
-
95
- class PyDoublesSerDe(PyObjectSerDe):
96
- def get_size(self, item):
97
- return int(8)
98
-
99
- def to_bytes(self, item):
100
- return struct.pack('d', item)
101
-
102
- def from_bytes(self, data: bytes, offset: int):
103
- val = struct.unpack_from('d', data, offset)[0]
104
- return (val, 8)