datasketches 0.3.2 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -4
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +539 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -1,463 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "## KLL Sketch Examples"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "### Basic Sketch Usage"
15
- ]
16
- },
17
- {
18
- "cell_type": "code",
19
- "execution_count": 1,
20
- "metadata": {},
21
- "outputs": [],
22
- "source": [
23
- "from datasketches import kll_floats_sketch, kll_ints_sketch"
24
- ]
25
- },
26
- {
27
- "cell_type": "markdown",
28
- "metadata": {},
29
- "source": [
30
- "Options are a `kll_floats_sketch` or `kll_ints_sketch`. We'll use the former so we can draw samples from a Gaussian distribution. We start by creating a sketch with $k=200$, which gives a normalized rank error of about 1.65%, and feeding in 1 million points."
31
- ]
32
- },
33
- {
34
- "cell_type": "code",
35
- "execution_count": 2,
36
- "metadata": {},
37
- "outputs": [],
38
- "source": [
39
- "n = 1000000\n",
40
- "kll = kll_floats_sketch(200)\n",
41
- "from numpy.random import randn\n",
42
- "for i in range(0, n):\n",
43
- " kll.update(randn()) "
44
- ]
45
- },
46
- {
47
- "cell_type": "markdown",
48
- "metadata": {},
49
- "source": [
50
- "Since the data is distributed as $\\cal{N}(0,1)$, 0.0 should be near the median rank (0.5)"
51
- ]
52
- },
53
- {
54
- "cell_type": "code",
55
- "execution_count": 3,
56
- "metadata": {},
57
- "outputs": [
58
- {
59
- "data": {
60
- "text/plain": [
61
- "0.497608"
62
- ]
63
- },
64
- "execution_count": 3,
65
- "metadata": {},
66
- "output_type": "execute_result"
67
- }
68
- ],
69
- "source": [
70
- "kll.get_rank(0.0)"
71
- ]
72
- },
73
- {
74
- "cell_type": "markdown",
75
- "metadata": {},
76
- "source": [
77
- "And the median should also be near 0.0"
78
- ]
79
- },
80
- {
81
- "cell_type": "code",
82
- "execution_count": 4,
83
- "metadata": {},
84
- "outputs": [
85
- {
86
- "data": {
87
- "text/plain": [
88
- "0.003108405973762274"
89
- ]
90
- },
91
- "execution_count": 4,
92
- "metadata": {},
93
- "output_type": "execute_result"
94
- }
95
- ],
96
- "source": [
97
- "kll.get_quantile(0.5)"
98
- ]
99
- },
100
- {
101
- "cell_type": "markdown",
102
- "metadata": {},
103
- "source": [
104
- "We track the min and max values as well. They are stored separately from the quantile data so we can always determine the full _empirical_ data range. In this case they should be very roughly symmetric around 0.0. We can query these values explicitly, or implicitly by asking for the values at ranks 0.0 and 1.0."
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": 5,
110
- "metadata": {},
111
- "outputs": [
112
- {
113
- "data": {
114
- "text/plain": [
115
- "[-4.6000142097473145, 4.779754638671875]"
116
- ]
117
- },
118
- "execution_count": 5,
119
- "metadata": {},
120
- "output_type": "execute_result"
121
- }
122
- ],
123
- "source": [
124
- "[kll.get_min_value(), kll.get_max_value()]"
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": 6,
130
- "metadata": {},
131
- "outputs": [
132
- {
133
- "data": {
134
- "text/plain": [
135
- "[-4.6000142097473145, 4.779754638671875]"
136
- ]
137
- },
138
- "execution_count": 6,
139
- "metadata": {},
140
- "output_type": "execute_result"
141
- }
142
- ],
143
- "source": [
144
- "kll.get_quantiles([0.0, 1.0])"
145
- ]
146
- },
147
- {
148
- "cell_type": "markdown",
149
- "metadata": {},
150
- "source": [
151
- "And out of curiosity, we can check how many items the sketch has seen and how many it is retaining"
152
- ]
153
- },
154
- {
155
- "cell_type": "code",
156
- "execution_count": 7,
157
- "metadata": {},
158
- "outputs": [
159
- {
160
- "data": {
161
- "text/plain": [
162
- "1000000"
163
- ]
164
- },
165
- "execution_count": 7,
166
- "metadata": {},
167
- "output_type": "execute_result"
168
- }
169
- ],
170
- "source": [
171
- "kll.get_n()"
172
- ]
173
- },
174
- {
175
- "cell_type": "code",
176
- "execution_count": 8,
177
- "metadata": {},
178
- "outputs": [
179
- {
180
- "data": {
181
- "text/plain": [
182
- "614"
183
- ]
184
- },
185
- "execution_count": 8,
186
- "metadata": {},
187
- "output_type": "execute_result"
188
- }
189
- ],
190
- "source": [
191
- "kll.get_num_retained()"
192
- ]
193
- },
194
- {
195
- "cell_type": "markdown",
196
- "metadata": {},
197
- "source": [
198
- "Finally, we can serialize the sketch for archiving, and reconstruct it later. Note that the serialized image does _not_ contain information on whether it is a floats or ints sketch."
199
- ]
200
- },
201
- {
202
- "cell_type": "code",
203
- "execution_count": 9,
204
- "metadata": {},
205
- "outputs": [
206
- {
207
- "data": {
208
- "text/plain": [
209
- "2536"
210
- ]
211
- },
212
- "execution_count": 9,
213
- "metadata": {},
214
- "output_type": "execute_result"
215
- }
216
- ],
217
- "source": [
218
- "sk_bytes = kll.serialize()\n",
219
- "len(sk_bytes)"
220
- ]
221
- },
222
- {
223
- "cell_type": "code",
224
- "execution_count": 10,
225
- "metadata": {},
226
- "outputs": [
227
- {
228
- "name": "stdout",
229
- "output_type": "stream",
230
- "text": [
231
- "### KLL sketch summary:\n",
232
- " K : 200\n",
233
- " min K : 200\n",
234
- " M : 8\n",
235
- " N : 1000000\n",
236
- " Epsilon : 1.33%\n",
237
- " Epsilon PMF : 1.65%\n",
238
- " Empty : false\n",
239
- " Estimation mode: true\n",
240
- " Levels : 13\n",
241
- " Sorted : true\n",
242
- " Capacity items : 617\n",
243
- " Retained items : 614\n",
244
- " Storage bytes : 2536\n",
245
- " Min value : -4.6\n",
246
- " Max value : 4.78\n",
247
- "### End sketch summary\n",
248
- "\n"
249
- ]
250
- }
251
- ],
252
- "source": [
253
- "kll2 = kll_floats_sketch.deserialize(sk_bytes)\n",
254
- "print(kll2)"
255
- ]
256
- },
257
- {
258
- "cell_type": "markdown",
259
- "metadata": {},
260
- "source": [
261
- "### Merging Sketches"
262
- ]
263
- },
264
- {
265
- "cell_type": "markdown",
266
- "metadata": {},
267
- "source": [
268
- "KLL sketches have a `merge()` operation to combine sketches. The resulting sketch will have no worse error boudns than if the full data had been sent to a single sketch."
269
- ]
270
- },
271
- {
272
- "cell_type": "markdown",
273
- "metadata": {},
274
- "source": [
275
- "Our previous sketch used $\\cal{N}(0,1)$, so now we'll generate a shifted Gaussian distributed as $\\cal{N}(4,1)$. For added variety, we can use half as many points. The next section will generate a plot, so we will defer queries of the merged skech to that section."
276
- ]
277
- },
278
- {
279
- "cell_type": "code",
280
- "execution_count": 12,
281
- "metadata": {},
282
- "outputs": [],
283
- "source": [
284
- "sk2 = kll_floats_sketch(200)\n",
285
- "for i in range(0, int(n/2)):\n",
286
- " sk2.update(4 + randn())"
287
- ]
288
- },
289
- {
290
- "cell_type": "code",
291
- "execution_count": 13,
292
- "metadata": {},
293
- "outputs": [
294
- {
295
- "name": "stdout",
296
- "output_type": "stream",
297
- "text": [
298
- "### KLL sketch summary:\n",
299
- " K : 200\n",
300
- " min K : 200\n",
301
- " M : 8\n",
302
- " N : 1500000\n",
303
- " Epsilon : 1.33%\n",
304
- " Epsilon PMF : 1.65%\n",
305
- " Empty : false\n",
306
- " Estimation mode: true\n",
307
- " Levels : 13\n",
308
- " Sorted : false\n",
309
- " Capacity items : 617\n",
310
- " Retained items : 580\n",
311
- " Storage bytes : 2400\n",
312
- " Min value : -4.6\n",
313
- " Max value : 9.06\n",
314
- "### End sketch summary\n",
315
- "\n"
316
- ]
317
- }
318
- ],
319
- "source": [
320
- "kll.merge(sk2)\n",
321
- "print(kll)"
322
- ]
323
- },
324
- {
325
- "cell_type": "markdown",
326
- "metadata": {},
327
- "source": [
328
- "### Generating Histograms"
329
- ]
330
- },
331
- {
332
- "cell_type": "markdown",
333
- "metadata": {},
334
- "source": [
335
- "The KLL sketch allows us compute histograms via the probability mass function (pmf). Since histograms are a typical plot type when visualizing data distributions, we will create such a figure. To instead create a cumulative distribution function (cdf) from the sketch, simply replace the call to `get_pmf()` with `get_cdf()`."
336
- ]
337
- },
338
- {
339
- "cell_type": "markdown",
340
- "metadata": {},
341
- "source": [
342
- "We want our x-axis to have evenly distributed bins, so the first step is to split the empirical data range\n",
343
- "into a set of bins."
344
- ]
345
- },
346
- {
347
- "cell_type": "code",
348
- "execution_count": 14,
349
- "metadata": {},
350
- "outputs": [],
351
- "source": [
352
- "xmin = kll.get_min_value()\n",
353
- "num_splits = 30\n",
354
- "step = (kll.get_max_value() - xmin) / num_splits\n",
355
- "splits = [xmin + (i*step) for i in range(0, num_splits)]"
356
- ]
357
- },
358
- {
359
- "cell_type": "markdown",
360
- "metadata": {},
361
- "source": [
362
- "`get_pmf()` returns the probability mass in the range $(x_{i-1}, x_i]$, for each bin $i$. If we use the minimum value for $x_{i-1}$ this covers the low end, but `get_pmf()` also returns an extra bin with all mass greater than the last-provided split point. As a result, the pmf array is 1 larger than the list of split points. We need to be sure to append a value to the split points for plotting."
363
- ]
364
- },
365
- {
366
- "cell_type": "code",
367
- "execution_count": 15,
368
- "metadata": {},
369
- "outputs": [],
370
- "source": [
371
- "pmf = kll.get_pmf(splits)\n",
372
- "x = splits # this will hold the x-axis values, so need to append the max value\n",
373
- "x.append(kll.get_max_value())"
374
- ]
375
- },
376
- {
377
- "cell_type": "markdown",
378
- "metadata": {},
379
- "source": [
380
- "We need some plotting-related imports and options"
381
- ]
382
- },
383
- {
384
- "cell_type": "code",
385
- "execution_count": 16,
386
- "metadata": {},
387
- "outputs": [],
388
- "source": [
389
- "import seaborn as sns\n",
390
- "import matplotlib.pyplot as plt\n",
391
- "%matplotlib inline\n",
392
- "sns.set(color_codes=True)"
393
- ]
394
- },
395
- {
396
- "cell_type": "markdown",
397
- "metadata": {},
398
- "source": [
399
- "Using a negative width in the plot gives right-aligned bins, which matches the bin definition noted earlier."
400
- ]
401
- },
402
- {
403
- "cell_type": "code",
404
- "execution_count": 17,
405
- "metadata": {},
406
- "outputs": [
407
- {
408
- "data": {
409
- "text/plain": [
410
- "<BarContainer object of 31 artists>"
411
- ]
412
- },
413
- "execution_count": 17,
414
- "metadata": {},
415
- "output_type": "execute_result"
416
- },
417
- {
418
- "data": {
419
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD7CAYAAABpJS8eAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAUgklEQVR4nO3dfYxc11nH8e+u7V3b8Tpt3aliJyQISh8UBHFoCCp2oCgG1IjUlCSNFKM0ahs3qigvSkVVxYHwkgoaBVcWKVQJqgPutrSuqJvWSSs3hhSqNKWKE4mkj2glAjhGmA0Q240d22v+mLvRdLu7c3d3dmdmz/cjWZp7zrnrZ95+e/fMvWcGzp07hyRp6RvsdgGSpMVh4EtSIQx8SSqEgS9JhTDwJakQy7tdwDSGgZ8CjgBnu1yLJPWLZcB64BvAqcmdvRr4PwV8tdtFSFKfugr4h8mNvRr4RwD+539OMD7eW9cJrFu3hrGx490uoxZrXRj9VCv0V73WOj+DgwO8+tXnQZWhk9UK/Ii4CdgBDAE7M/O+acY9CBzMzN3V9ibgI8AKYAx4Z2Y+V+O/PAswPn6u5wIf6MmapmOtC6OfaoX+qtdaO2LKqfC2H9pGxIXA3cBm4DJge0RcOmnMhoh4CLhh0u6fAN6VmRur27vmULgkqQPqnKWzBXg0M1/IzBPAXuD6SWO2AfuAT080RMQwsCMzn66angYunn/JkqS5qDOls4HvnQ86AlzZOiAz7wGIiM0tbaeAPVX7IHAX8Ln5lStJmqs6gT8wRdt43f8gIoaAB6v/60N194PmhyK9qNEY6XYJtVnrwuinWqG/6rXWhVMn8A/TPMVnwnrg+To/PCLWAJ+n+YHt1sw8PZvixsaO99yHIo3GCEePHut2GbVY68Lop1qhv+q11vkZHByY8UC5TuAfAO6KiAZwArgO2F7z/98DfBt4T2b2VnJLUmHafmibmYeBO4CDwCFgNDOfiIj9EXHFdPtFxOXAVmAT8GREHIqI/R2qW5I0S7XOw8/MUWB0Uts1U4y7peX2k0w9/6+CjKxdxcrhmV9mJ0+dWaRqpLL16pW2WiJWDi/n2tv3zTjmoXu3LlI1UtlcLVOSCuERvnrCy6fPtj3F7eSpMxx78aVFqkhaegx89YShFctqTf301klwUn9xSkeSCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBVieZ1BEXETsAMYAnZm5n3TjHsQOJiZu6vti4E9wOuABLZl5vEO1K1CjaxdxcrhmV+2J0+d4diLLy1SRVL/aBv4EXEhcDfwRuAU8LWIOJiZz7SM2QB8DLgaONiy+0eBj2bmpyLiTuBO4AMdrF+FWTm8nGtv3zfjmIfu3cqxRapH6id1pnS2AI9m5guZeQLYC1w/acw2YB/w6YmGiFgB/Gw1HmA3cMN8C5YkzU2dKZ0NwJGW7SPAla0DMvMegIjY3NL8WuDFzDzTst9Fcy9VkjQfdQJ/YIq28QXc7xXr1q2ZzfBF02iMdLuE2vqp1k5a6Pvdb49rP9VrrQunTuAfBq5q2V4PPF9jv6PA2ohYlplnZ7HfK8bGjjM+fm42uyy4RmOEo0f7Y4a4F2rt1htiIe93Lzyus9FP9Vrr/AwODsx4oFxnDv8AcHVENCJiNXAd8Ei7nTLzNPBV4Maq6Wbg4Rr/nyRpAbQN/Mw8DNxB8+ybQ8BoZj4REfsj4oo2u78X2B4Rz9D8K2HHfAuWJM1NrfPwM3MUGJ3Uds0U426ZtP0c8Oa5lydJ6hSvtJWkQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIWotniZNVvfLxCX1DgNfc1L3y8Ql9Q6ndCSpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgpRa7XMiLgJ2AEMATsz875J/RuB+4HzgceA2zLzTET8IPBXwFrgf4F3ZOZznStfklRX2yP8iLgQuBvYDFwGbI+ISycN2wO8LzPfAAwAt1btfwh8MjM3Ap+tfo4kqQvqTOlsAR7NzBcy8wSwF7h+ojMiLgFWZebjVdNu4Ibq9jKaR/cA5wEvdaJoSdLs1ZnS2QAcadk+AlzZpv+i6vadwNci4jdoTge9aTbFrVu3ZjbDF02jMdLtEmrrp1o7aaHvd789rv1Ur7UunDqBPzBF23jN/geB7Zm5LyKuA/42In4iM8/VKW5s7Djj47WGLppGY4SjR491u4xaFrLWXn+hL+Rz1E+vAeiveq11fgYHB2Y8UK4zpXMYuKBlez3wfLv+iGgAP5qZ+wAy87PVuNfWK12S1El1Av8AcHVENCJiNXAd8MhEZ3XWzcmI2FQ13Qw8DPx31b4ZoOo/lplHO3kHJEn1tA38zDwM3AEcBA4Bo5n5RETsj4grqmHbgJ0R8SzND2d3VdM2vwrcGxFPAx+m+ctCktQFtc7Dz8xRYHRS2zUtt5/iez/InWh/AvjpedYoSeoAr7SVpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQtT6xiupn7x8+iyNxkjbcSdPneHYiy8tQkVSbzDwteQMrVjGtbfvazvuoXu3cmwR6pF6hVM6klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVotZ5+BFxE7ADGAJ2ZuZ9k/o3AvcD5wOPAbdl5pmIWA88AGwAvgtsy8x/7Vz5kqS62h7hR8SFwN3AZuAyYHtEXDpp2B7gfZn5BmAAuLVq/2vgocy8vLr9J50qXJI0O3WmdLYAj2bmC5l5AtgLXD/RGRGXAKsy8/GqaTdwQ0S8luYviI9V7R+n+VeCJKkL6kzpbACOtGwfAa5s038R8MPAvwE7I+Lnq9u/Ppvi1q1bM5vhi6bOOi29Yra1vnz6LEMrli1QNb1nrs9lr7wG6j5f579qdd88r73y2NbRT7VCvcAfmKJtvEb/cuBy4Pcy87ci4t3Ag8Cb6xY3Nnac8fFzdYcvikZjhKNH+2MFlrnU2miM1F6HZimYy3PZS6+B2TxfvVLzTHrpsW2nF2sdHByY8UC5zpTOYeCClu31wPM1+v8TOJaZX6jaR/nevwwkSYuoTuAfAK6OiEZErAauAx6Z6MzM54CTEbGparoZeDgzvwMcjoi3VO3XAt/sXOmS6ppYMrrdv5G1q7pdqhZQ2ymdzDwcEXcAB2melvlAZj4REfuB383MfwK2AfdHxAjwJLCr2v1twMci4h7gReAdC3EnJM3MJaMFNc/Dz8xRmlMyrW3XtNx+iimmazIzmcWcvSRp4XilrSQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpELW+AEXS4htZu4qVw75F1Tm+mqQetXJ4eduvJXzo3q2LVI2WAqd0JKkQBr4kFcLAl6RCOIcv6RUvnz5LozEy45iTp85w7MWXFqkidZKBL+kVQyuW1fqg+Ngi1aPOckpHkgph4EtSIQx8SSpErTn8iLgJ2AEMATsz875J/RuB+4HzgceA2zLzTEv/5cDjmTncqcIlSbPT9gg/Ii4E7gY2A5cB2yPi0knD9gDvy8w3AAPArS37rwb+jOYvC0lSl9SZ0tkCPJqZL2TmCWAvcP1EZ0RcAqzKzMerpt3ADS373wvs7Ey5kqS5qhP4G4AjLdtHgIvq9EfEW4HVmbl3nnVKkuapzhz+wBRt4+36I+ICmvP+W+ZSGMC6dWvmuuuCandhSi/pp1q7Ya6PT+mP60Le/356bPupVqgX+IeBq1q21wPPT+q/YIr+XwbWAY9FBAARcQi4KjNrXbcxNnac8fFzdYYumkZjhKNH++Oyk7nU2m8v4Pmay3O5WK+BXn4uFur+L/X310IbHByY8UC5TuAfAO6KiAZwArgO2D7RmZnPRcTJiNiUmf8I3Aw8nJkPAA9MjIuIc5m5cY73Q5I0T23n8DPzMHAHcBA4BIxm5hMRsT8irqiGbQN2RsSzwHnAroUqWJI0N7XOw8/MUWB0Uts1LbefAq5s8zOmmuuXJC0Sr7SVpEIY+JJUCANfkgrhevgqVp0v+4DOf+HHyNpVrBye+a138tSZGfuluTDwVaw6X/YBnf/Cj5XDy2t9yYjUaQZ+IeocVUpa2kyAQtQ5qgSPLKWlzA9tJakQBr4kFcLAl6RCGPiSVAgDX5IK4Vk6kmat7sVjnbxgTfNn4EuatboXj/XW14PIKR1JKoSBL0mFMPAlqRDO4UttTLWq5uTtidUtXQVTvczAl9qos6rmxBpEroKpXuaUjiQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9Jhah1Hn5E3ATsAIaAnZl536T+jcD9wPnAY8BtmXkmIjYBHwFWAGPAOzPzuQ7WL0mqqe0RfkRcCNwNbAYuA7ZHxKWThu0B3peZbwAGgFur9k8A78rMjdXtXZ0qXJI0O3WmdLYAj2bmC5l5AtgLXD/RGRGXAKsy8/GqaTdwQ0QMAzsy8+mq/Wng4o5VLkmalTpTOhuAIy3bR4Ar2/RflJmnaB75ExGDwF3A5+ZTrCRp7uoE/sAUbeN1+yNiCHiw+r8+NJvi1q1bM5vhi2bywlm9rJ9q1dIzl9dfP71m+6lWqBf4h4GrWrbXA89P6r9gqv6IWAN8nuYHtlsz8/RsihsbO874+LnZ7LLgGo0Rjh7tj+/xaa21316YWhpm+17p1/dXrxgcHJjxQLnOHP4B4OqIaETEauA64JGJzuqsm5PVGTkANwMPV7f3AN8G3l5N8UiSuqRt4GfmYeAO4CBwCBjNzCciYn9EXFEN2wbsjIhngfOAXRFxObAV2AQ8GRGHImL/gtwLSVJbtc7Dz8xRYHRS2zUtt5/iez/IBXiSqef3JUld4JW2klQIA1+SCmHgS1IhDHxJKoRfYi5pwYysXcXK4Zlj5uSpMxx78aVFqqhsBr6kBbNyeDnX3r5vxjEP3buV3rp8aeky8JeAmY6ivMJW0gQDfwmoexQlqWx+aCtJhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhBdeSeq61qvFp7s63DV35s/Al9R1rrmzOJzSkaRCGPiSVAgDX5IKYeBLUiEMfEkqhGfp9LC6Xw8nSXUY+D3MLzaR1ElO6UhSIQx8SSpErSmdiLgJ2AEMATsz875J/RuB+4HzgceA2zLzTERcDOwBXgcksC0zj3ewfklSTW0DPyIuBO4G3gicAr4WEQcz85mWYXuAd2fm4xHxl8CtwJ8DHwU+mpmfiog7gTuBD3T6TvQbP4yVZu/l02enXWen1amXzzI8tKztuBLX5qlzhL8FeDQzXwCIiL3A9cAfVNuXAKsy8/Fq/G7g9yPiAeBngV9paf976gX+MoDBwYFad2KxzbeulcPLedcffXnGMX+54xcBeN2rV9X6mXXGdfJn9fq4Xq6t0+N6ubZOjhtasazt+waa7526407M873caxnVUs+Uv/EGzp07N+MPiIgPAudl5o5q+93AlZm5vdp+E3BPZm6utl8P7Ad+DvhGZl5UtS8HvpuZQzXq3gx8tcY4SdL3uwr4h8mNdY7wp/oVNl6jv91+M/kGzYKPAGdr7iNJpVsGrKeZod+nTuAfphm+E9YDz0/qv2CK/qPA2ohYlplnp9hvJqeY4reTJKmt70zXUee0zAPA1RHRiIjVwHXAIxOdmfkccDIiNlVNNwMPZ+ZpmtMyN7a2z6F4SVIHtA38zDwM3AEcBA4Bo5n5RETsj4grqmHbgJ0R8SxwHrCran8vsD0inqH5V8KOTt8BSVI9bT+0lSQtDV5pK0mFMPAlqRAGviQVwsCXpEK4Hv4cRcTlwOOZOdztWqZTnSr7EWAFMAa8szqNtqe0W5yvl0TE7wFvrza/mJm/08166oiIe4BGZt7S7VqmExHXAnfRPMvvS5n5m92taHoR8WvAB6vNhzPz/d2sZzY8wp+D6nqEP6MZUL3sE8C7MnNjdXtXm/GLrmVxvs3AZTRP4720u1VNLSK2AL8IXA5sBN4YEW/rblUzi4irgVu6XcdMIuKHgL8AtgI/DvxkRLylu1VNrXrv76K5dMxlwFXV66IvGPhzcy+ws9tFzCQihoEdmfl01fQ0cHEXS5rOK4vzZeYJYGJxvl50BLg9M1+uLix8lt58TAGIiNfQ/GX6oW7X0sbbgL/JzP+oHtcbga93uabpLKOZm+fR/Mt5BdA3S246pTNLEfFWYHVm7o2Ibpczrcw8RXPZaiJikOafy5/rZk3T2EAzSCccAa7sUi0zysx/nrgdET9CM5h+pnsVtfUxmhdN/kC3C2nj9cDLEfElmsu0PERzKfWek5nHqqXev0Uz6P8O+FpXi5oFA38aEXED338U/y1gLc2j0p4xXa2ZuSUihoAHaT7XvXikN59F9roiIn4M+CLw/sz8l27XM5VqVdt/z8yvRMQt3a6njeU0l1J/M3Ac2Ae8g+aS6j0lIn4CeCdwCfB/NA+q3g/c08266jLwp5GZnwE+09pWvYk+CDw2cXQfEYeAqzLz2KIXWZmqVoCIWAN8nuYHtlurP5d7TbvF+XpK9UH4Z4HfysxPdbueGdwIrK9en68B1kTEzsz87S7XNZX/BA5k5lGAiPgczb/ydnezqGn8EvCVzPwvgIjYTXMJGQN/qcnMB4AHJrYj4lz1gWiv2gN8G3hPZvbqGhoHgLsiogGcoLk43/buljS1iPgBmtNiN2bmo92uZyaZ+QsTt6sj/Df3aNgDfAF4MCJeBRwD3kJvTj8CPAV8OCLOA74LXMs0SxH3Ij+0XaKq00a3ApuAJyPiUETs73JZ32e6xfm6W9W03g+sBP60ejwPRcRt3S6q32Xm14EP01wS/RngOeDjXS1qGpn5ZeCTwDdpngixAvjjrhY1Cy6eJkmF8Ahfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVIj/B2kl2CPiXnJXAAAAAElFTkSuQmCC\n",
420
- "text/plain": [
421
- "<Figure size 432x288 with 1 Axes>"
422
- ]
423
- },
424
- "metadata": {
425
- "needs_background": "light"
426
- },
427
- "output_type": "display_data"
428
- }
429
- ],
430
- "source": [
431
- "plt.bar(x=x,height=pmf,align='edge',width=-0.43)"
432
- ]
433
- },
434
- {
435
- "cell_type": "markdown",
436
- "metadata": {},
437
- "source": [
438
- "The leftmost peak came from the first sketch, with data centered around 0.0. The smaller, rightmost peak came from our second sketch, which had half as many samples and was centered around 4.0. The KLL sketch captures the shape of the combiend distribution."
439
- ]
440
- }
441
- ],
442
- "metadata": {
443
- "kernelspec": {
444
- "display_name": "Python 3",
445
- "language": "python",
446
- "name": "python3"
447
- },
448
- "language_info": {
449
- "codemirror_mode": {
450
- "name": "ipython",
451
- "version": 3
452
- },
453
- "file_extension": ".py",
454
- "mimetype": "text/x-python",
455
- "name": "python",
456
- "nbconvert_exporter": "python",
457
- "pygments_lexer": "ipython3",
458
- "version": "3.7.0"
459
- }
460
- },
461
- "nbformat": 4,
462
- "nbformat_minor": 2
463
- }