datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,463 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## KLL Sketch Examples"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "### Basic Sketch Usage"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 1,
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "from datasketches import kll_floats_sketch, kll_ints_sketch"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "Options are a `kll_floats_sketch` or `kll_ints_sketch`. We'll use the former so we can draw samples from a Gaussian distribution. We start by creating a sketch with $k=200$, which gives a normalized rank error of about 1.65%, and feeding in 1 million points."
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 2,
36
+ "metadata": {},
37
+ "outputs": [],
38
+ "source": [
39
+ "n = 1000000\n",
40
+ "kll = kll_floats_sketch(200)\n",
41
+ "from numpy.random import randn\n",
42
+ "for i in range(0, n):\n",
43
+ " kll.update(randn()) "
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "markdown",
48
+ "metadata": {},
49
+ "source": [
50
+ "Since the data is distributed as $\\cal{N}(0,1)$, 0.0 should be near the median rank (0.5)"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 3,
56
+ "metadata": {},
57
+ "outputs": [
58
+ {
59
+ "data": {
60
+ "text/plain": [
61
+ "0.497608"
62
+ ]
63
+ },
64
+ "execution_count": 3,
65
+ "metadata": {},
66
+ "output_type": "execute_result"
67
+ }
68
+ ],
69
+ "source": [
70
+ "kll.get_rank(0.0)"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "markdown",
75
+ "metadata": {},
76
+ "source": [
77
+ "And the median should also be near 0.0"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 4,
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "data": {
87
+ "text/plain": [
88
+ "0.003108405973762274"
89
+ ]
90
+ },
91
+ "execution_count": 4,
92
+ "metadata": {},
93
+ "output_type": "execute_result"
94
+ }
95
+ ],
96
+ "source": [
97
+ "kll.get_quantile(0.5)"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "markdown",
102
+ "metadata": {},
103
+ "source": [
104
+ "We track the min and max values as well. They are stored separately from the quantile data so we can always determine the full _empirical_ data range. In this case they should be very roughly symmetric around 0.0. We can query these values explicitly, or implicitly by asking for the values at ranks 0.0 and 1.0."
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 5,
110
+ "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "data": {
114
+ "text/plain": [
115
+ "[-4.6000142097473145, 4.779754638671875]"
116
+ ]
117
+ },
118
+ "execution_count": 5,
119
+ "metadata": {},
120
+ "output_type": "execute_result"
121
+ }
122
+ ],
123
+ "source": [
124
+ "[kll.get_min_value(), kll.get_max_value()]"
125
+ ]
126
+ },
127
+ {
128
+ "cell_type": "code",
129
+ "execution_count": 6,
130
+ "metadata": {},
131
+ "outputs": [
132
+ {
133
+ "data": {
134
+ "text/plain": [
135
+ "[-4.6000142097473145, 4.779754638671875]"
136
+ ]
137
+ },
138
+ "execution_count": 6,
139
+ "metadata": {},
140
+ "output_type": "execute_result"
141
+ }
142
+ ],
143
+ "source": [
144
+ "kll.get_quantiles([0.0, 1.0])"
145
+ ]
146
+ },
147
+ {
148
+ "cell_type": "markdown",
149
+ "metadata": {},
150
+ "source": [
151
+ "And out of curiosity, we can check how many items the sketch has seen and how many it is retaining"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 7,
157
+ "metadata": {},
158
+ "outputs": [
159
+ {
160
+ "data": {
161
+ "text/plain": [
162
+ "1000000"
163
+ ]
164
+ },
165
+ "execution_count": 7,
166
+ "metadata": {},
167
+ "output_type": "execute_result"
168
+ }
169
+ ],
170
+ "source": [
171
+ "kll.get_n()"
172
+ ]
173
+ },
174
+ {
175
+ "cell_type": "code",
176
+ "execution_count": 8,
177
+ "metadata": {},
178
+ "outputs": [
179
+ {
180
+ "data": {
181
+ "text/plain": [
182
+ "614"
183
+ ]
184
+ },
185
+ "execution_count": 8,
186
+ "metadata": {},
187
+ "output_type": "execute_result"
188
+ }
189
+ ],
190
+ "source": [
191
+ "kll.get_num_retained()"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "metadata": {},
197
+ "source": [
198
+ "Finally, we can serialize the sketch for archiving, and reconstruct it later. Note that the serialized image does _not_ contain information on whether it is a floats or ints sketch."
199
+ ]
200
+ },
201
+ {
202
+ "cell_type": "code",
203
+ "execution_count": 9,
204
+ "metadata": {},
205
+ "outputs": [
206
+ {
207
+ "data": {
208
+ "text/plain": [
209
+ "2536"
210
+ ]
211
+ },
212
+ "execution_count": 9,
213
+ "metadata": {},
214
+ "output_type": "execute_result"
215
+ }
216
+ ],
217
+ "source": [
218
+ "sk_bytes = kll.serialize()\n",
219
+ "len(sk_bytes)"
220
+ ]
221
+ },
222
+ {
223
+ "cell_type": "code",
224
+ "execution_count": 10,
225
+ "metadata": {},
226
+ "outputs": [
227
+ {
228
+ "name": "stdout",
229
+ "output_type": "stream",
230
+ "text": [
231
+ "### KLL sketch summary:\n",
232
+ " K : 200\n",
233
+ " min K : 200\n",
234
+ " M : 8\n",
235
+ " N : 1000000\n",
236
+ " Epsilon : 1.33%\n",
237
+ " Epsilon PMF : 1.65%\n",
238
+ " Empty : false\n",
239
+ " Estimation mode: true\n",
240
+ " Levels : 13\n",
241
+ " Sorted : true\n",
242
+ " Capacity items : 617\n",
243
+ " Retained items : 614\n",
244
+ " Storage bytes : 2536\n",
245
+ " Min value : -4.6\n",
246
+ " Max value : 4.78\n",
247
+ "### End sketch summary\n",
248
+ "\n"
249
+ ]
250
+ }
251
+ ],
252
+ "source": [
253
+ "kll2 = kll_floats_sketch.deserialize(sk_bytes)\n",
254
+ "print(kll2)"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "markdown",
259
+ "metadata": {},
260
+ "source": [
261
+ "### Merging Sketches"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "markdown",
266
+ "metadata": {},
267
+ "source": [
268
+ "KLL sketches have a `merge()` operation to combine sketches. The resulting sketch will have no worse error boudns than if the full data had been sent to a single sketch."
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "markdown",
273
+ "metadata": {},
274
+ "source": [
275
+ "Our previous sketch used $\\cal{N}(0,1)$, so now we'll generate a shifted Gaussian distributed as $\\cal{N}(4,1)$. For added variety, we can use half as many points. The next section will generate a plot, so we will defer queries of the merged skech to that section."
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": 12,
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": [
284
+ "sk2 = kll_floats_sketch(200)\n",
285
+ "for i in range(0, int(n/2)):\n",
286
+ " sk2.update(4 + randn())"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": 13,
292
+ "metadata": {},
293
+ "outputs": [
294
+ {
295
+ "name": "stdout",
296
+ "output_type": "stream",
297
+ "text": [
298
+ "### KLL sketch summary:\n",
299
+ " K : 200\n",
300
+ " min K : 200\n",
301
+ " M : 8\n",
302
+ " N : 1500000\n",
303
+ " Epsilon : 1.33%\n",
304
+ " Epsilon PMF : 1.65%\n",
305
+ " Empty : false\n",
306
+ " Estimation mode: true\n",
307
+ " Levels : 13\n",
308
+ " Sorted : false\n",
309
+ " Capacity items : 617\n",
310
+ " Retained items : 580\n",
311
+ " Storage bytes : 2400\n",
312
+ " Min value : -4.6\n",
313
+ " Max value : 9.06\n",
314
+ "### End sketch summary\n",
315
+ "\n"
316
+ ]
317
+ }
318
+ ],
319
+ "source": [
320
+ "kll.merge(sk2)\n",
321
+ "print(kll)"
322
+ ]
323
+ },
324
+ {
325
+ "cell_type": "markdown",
326
+ "metadata": {},
327
+ "source": [
328
+ "### Generating Histograms"
329
+ ]
330
+ },
331
+ {
332
+ "cell_type": "markdown",
333
+ "metadata": {},
334
+ "source": [
335
+ "The KLL sketch allows us compute histograms via the probability mass function (pmf). Since histograms are a typical plot type when visualizing data distributions, we will create such a figure. To instead create a cumulative distribution function (cdf) from the sketch, simply replace the call to `get_pmf()` with `get_cdf()`."
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "markdown",
340
+ "metadata": {},
341
+ "source": [
342
+ "We want our x-axis to have evenly distributed bins, so the first step is to split the empirical data range\n",
343
+ "into a set of bins."
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": 14,
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "xmin = kll.get_min_value()\n",
353
+ "num_splits = 30\n",
354
+ "step = (kll.get_max_value() - xmin) / num_splits\n",
355
+ "splits = [xmin + (i*step) for i in range(0, num_splits)]"
356
+ ]
357
+ },
358
+ {
359
+ "cell_type": "markdown",
360
+ "metadata": {},
361
+ "source": [
362
+ "`get_pmf()` returns the probability mass in the range $(x_{i-1}, x_i]$, for each bin $i$. If we use the minimum value for $x_{i-1}$ this covers the low end, but `get_pmf()` also returns an extra bin with all mass greater than the last-provided split point. As a result, the pmf array is 1 larger than the list of split points. We need to be sure to append a value to the split points for plotting."
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "code",
367
+ "execution_count": 15,
368
+ "metadata": {},
369
+ "outputs": [],
370
+ "source": [
371
+ "pmf = kll.get_pmf(splits)\n",
372
+ "x = splits # this will hold the x-axis values, so need to append the max value\n",
373
+ "x.append(kll.get_max_value())"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "markdown",
378
+ "metadata": {},
379
+ "source": [
380
+ "We need some plotting-related imports and options"
381
+ ]
382
+ },
383
+ {
384
+ "cell_type": "code",
385
+ "execution_count": 16,
386
+ "metadata": {},
387
+ "outputs": [],
388
+ "source": [
389
+ "import seaborn as sns\n",
390
+ "import matplotlib.pyplot as plt\n",
391
+ "%matplotlib inline\n",
392
+ "sns.set(color_codes=True)"
393
+ ]
394
+ },
395
+ {
396
+ "cell_type": "markdown",
397
+ "metadata": {},
398
+ "source": [
399
+ "Using a negative width in the plot gives right-aligned bins, which matches the bin definition noted earlier."
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 17,
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "data": {
409
+ "text/plain": [
410
+ "<BarContainer object of 31 artists>"
411
+ ]
412
+ },
413
+ "execution_count": 17,
414
+ "metadata": {},
415
+ "output_type": "execute_result"
416
+ },
417
+ {
418
+ "data": {
419
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD7CAYAAABpJS8eAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAUgklEQVR4nO3dfYxc11nH8e+u7V3b8Tpt3aliJyQISh8UBHFoCCp2oCgG1IjUlCSNFKM0ahs3qigvSkVVxYHwkgoaBVcWKVQJqgPutrSuqJvWSSs3hhSqNKWKE4mkj2glAjhGmA0Q240d22v+mLvRdLu7c3d3dmdmz/cjWZp7zrnrZ95+e/fMvWcGzp07hyRp6RvsdgGSpMVh4EtSIQx8SSqEgS9JhTDwJakQy7tdwDSGgZ8CjgBnu1yLJPWLZcB64BvAqcmdvRr4PwV8tdtFSFKfugr4h8mNvRr4RwD+539OMD7eW9cJrFu3hrGx490uoxZrXRj9VCv0V73WOj+DgwO8+tXnQZWhk9UK/Ii4CdgBDAE7M/O+acY9CBzMzN3V9ibgI8AKYAx4Z2Y+V+O/PAswPn6u5wIf6MmapmOtC6OfaoX+qtdaO2LKqfC2H9pGxIXA3cBm4DJge0RcOmnMhoh4CLhh0u6fAN6VmRur27vmULgkqQPqnKWzBXg0M1/IzBPAXuD6SWO2AfuAT080RMQwsCMzn66angYunn/JkqS5qDOls4HvnQ86AlzZOiAz7wGIiM0tbaeAPVX7IHAX8Ln5lStJmqs6gT8wRdt43f8gIoaAB6v/60N194PmhyK9qNEY6XYJtVnrwuinWqG/6rXWhVMn8A/TPMVnwnrg+To/PCLWAJ+n+YHt1sw8PZvixsaO99yHIo3GCEePHut2GbVY68Lop1qhv+q11vkZHByY8UC5TuAfAO6KiAZwArgO2F7z/98DfBt4T2b2VnJLUmHafmibmYeBO4CDwCFgNDOfiIj9EXHFdPtFxOXAVmAT8GREHIqI/R2qW5I0S7XOw8/MUWB0Uts1U4y7peX2k0w9/6+CjKxdxcrhmV9mJ0+dWaRqpLL16pW2WiJWDi/n2tv3zTjmoXu3LlI1UtlcLVOSCuERvnrCy6fPtj3F7eSpMxx78aVFqkhaegx89YShFctqTf301klwUn9xSkeSCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBVieZ1BEXETsAMYAnZm5n3TjHsQOJiZu6vti4E9wOuABLZl5vEO1K1CjaxdxcrhmV+2J0+d4diLLy1SRVL/aBv4EXEhcDfwRuAU8LWIOJiZz7SM2QB8DLgaONiy+0eBj2bmpyLiTuBO4AMdrF+FWTm8nGtv3zfjmIfu3cqxRapH6id1pnS2AI9m5guZeQLYC1w/acw2YB/w6YmGiFgB/Gw1HmA3cMN8C5YkzU2dKZ0NwJGW7SPAla0DMvMegIjY3NL8WuDFzDzTst9Fcy9VkjQfdQJ/YIq28QXc7xXr1q2ZzfBF02iMdLuE2vqp1k5a6Pvdb49rP9VrrQunTuAfBq5q2V4PPF9jv6PA2ohYlplnZ7HfK8bGjjM+fm42uyy4RmOEo0f7Y4a4F2rt1htiIe93Lzyus9FP9Vrr/AwODsx4oFxnDv8AcHVENCJiNXAd8Ei7nTLzNPBV4Maq6Wbg4Rr/nyRpAbQN/Mw8DNxB8+ybQ8BoZj4REfsj4oo2u78X2B4Rz9D8K2HHfAuWJM1NrfPwM3MUGJ3Uds0U426ZtP0c8Oa5lydJ6hSvtJWkQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgph4EtSIWotniZNVvfLxCX1DgNfc1L3y8Ql9Q6ndCSpEAa+JBXCwJekQhj4klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVwsCXpEIY+JJUCANfkgpRa7XMiLgJ2AEMATsz875J/RuB+4HzgceA2zLzTET8IPBXwFrgf4F3ZOZznStfklRX2yP8iLgQuBvYDFwGbI+ISycN2wO8LzPfAAwAt1btfwh8MjM3Ap+tfo4kqQvqTOlsAR7NzBcy8wSwF7h+ojMiLgFWZebjVdNu4Ibq9jKaR/cA5wEvdaJoSdLs1ZnS2QAcadk+AlzZpv+i6vadwNci4jdoTge9aTbFrVu3ZjbDF02jMdLtEmrrp1o7aaHvd789rv1Ur7UunDqBPzBF23jN/geB7Zm5LyKuA/42In4iM8/VKW5s7Djj47WGLppGY4SjR491u4xaFrLWXn+hL+Rz1E+vAeiveq11fgYHB2Y8UK4zpXMYuKBlez3wfLv+iGgAP5qZ+wAy87PVuNfWK12S1El1Av8AcHVENCJiNXAd8MhEZ3XWzcmI2FQ13Qw8DPx31b4ZoOo/lplHO3kHJEn1tA38zDwM3AEcBA4Bo5n5RETsj4grqmHbgJ0R8SzND2d3VdM2vwrcGxFPAx+m+ctCktQFtc7Dz8xRYHRS2zUtt5/iez/InWh/AvjpedYoSeoAr7SVpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpEAa+JBXCwJekQtT6xiupn7x8+iyNxkjbcSdPneHYiy8tQkVSbzDwteQMrVjGtbfvazvuoXu3cmwR6pF6hVM6klQIA1+SCmHgS1IhDHxJKoSBL0mFMPAlqRAGviQVotZ5+BFxE7ADGAJ2ZuZ9k/o3AvcD5wOPAbdl5pmIWA88AGwAvgtsy8x/7Vz5kqS62h7hR8SFwN3AZuAyYHtEXDpp2B7gfZn5BmAAuLVq/2vgocy8vLr9J50qXJI0O3WmdLYAj2bmC5l5AtgLXD/RGRGXAKsy8/GqaTdwQ0S8luYviI9V7R+n+VeCJKkL6kzpbACOtGwfAa5s038R8MPAvwE7I+Lnq9u/Ppvi1q1bM5vhi6bOOi29Yra1vnz6LEMrli1QNb1nrs9lr7wG6j5f579qdd88r73y2NbRT7VCvcAfmKJtvEb/cuBy4Pcy87ci4t3Ag8Cb6xY3Nnac8fFzdYcvikZjhKNH+2MFlrnU2miM1F6HZimYy3PZS6+B2TxfvVLzTHrpsW2nF2sdHByY8UC5zpTOYeCClu31wPM1+v8TOJaZX6jaR/nevwwkSYuoTuAfAK6OiEZErAauAx6Z6MzM54CTEbGparoZeDgzvwMcjoi3VO3XAt/sXOmS6ppYMrrdv5G1q7pdqhZQ2ymdzDwcEXcAB2melvlAZj4REfuB383MfwK2AfdHxAjwJLCr2v1twMci4h7gReAdC3EnJM3MJaMFNc/Dz8xRmlMyrW3XtNx+iimmazIzmcWcvSRp4XilrSQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhIEvSYUw8CWpELW+AEXS4htZu4qVw75F1Tm+mqQetXJ4eduvJXzo3q2LVI2WAqd0JKkQBr4kFcLAl6RCOIcv6RUvnz5LozEy45iTp85w7MWXFqkidZKBL+kVQyuW1fqg+Ngi1aPOckpHkgph4EtSIQx8SSpErTn8iLgJ2AEMATsz875J/RuB+4HzgceA2zLzTEv/5cDjmTncqcIlSbPT9gg/Ii4E7gY2A5cB2yPi0knD9gDvy8w3AAPArS37rwb+jOYvC0lSl9SZ0tkCPJqZL2TmCWAvcP1EZ0RcAqzKzMerpt3ADS373wvs7Ey5kqS5qhP4G4AjLdtHgIvq9EfEW4HVmbl3nnVKkuapzhz+wBRt4+36I+ICmvP+W+ZSGMC6dWvmuuuCandhSi/pp1q7Ya6PT+mP60Le/356bPupVqgX+IeBq1q21wPPT+q/YIr+XwbWAY9FBAARcQi4KjNrXbcxNnac8fFzdYYumkZjhKNH++Oyk7nU2m8v4Pmay3O5WK+BXn4uFur+L/X310IbHByY8UC5TuAfAO6KiAZwArgO2D7RmZnPRcTJiNiUmf8I3Aw8nJkPAA9MjIuIc5m5cY73Q5I0T23n8DPzMHAHcBA4BIxm5hMRsT8irqiGbQN2RsSzwHnAroUqWJI0N7XOw8/MUWB0Uts1LbefAq5s8zOmmuuXJC0Sr7SVpEIY+JJUCANfkgrhevgqVp0v+4DOf+HHyNpVrBye+a138tSZGfuluTDwVaw6X/YBnf/Cj5XDy2t9yYjUaQZ+IeocVUpa2kyAQtQ5qgSPLKWlzA9tJakQBr4kFcLAl6RCGPiSVAgDX5IK4Vk6kmat7sVjnbxgTfNn4EuatboXj/XW14PIKR1JKoSBL0mFMPAlqRDO4UttTLWq5uTtidUtXQVTvczAl9qos6rmxBpEroKpXuaUjiQVwsCXpEIY+JJUCANfkgph4EtSIQx8SSqEgS9Jhah1Hn5E3ATsAIaAnZl536T+jcD9wPnAY8BtmXkmIjYBHwFWAGPAOzPzuQ7WL0mqqe0RfkRcCNwNbAYuA7ZHxKWThu0B3peZbwAGgFur9k8A78rMjdXtXZ0qXJI0O3WmdLYAj2bmC5l5AtgLXD/RGRGXAKsy8/GqaTdwQ0QMAzsy8+mq/Wng4o5VLkmalTpTOhuAIy3bR4Ar2/RflJmnaB75ExGDwF3A5+ZTrCRp7uoE/sAUbeN1+yNiCHiw+r8+NJvi1q1bM5vhi2bywlm9rJ9q1dIzl9dfP71m+6lWqBf4h4GrWrbXA89P6r9gqv6IWAN8nuYHtlsz8/RsihsbO874+LnZ7LLgGo0Rjh7tj+/xaa21316YWhpm+17p1/dXrxgcHJjxQLnOHP4B4OqIaETEauA64JGJzuqsm5PVGTkANwMPV7f3AN8G3l5N8UiSuqRt4GfmYeAO4CBwCBjNzCciYn9EXFEN2wbsjIhngfOAXRFxObAV2AQ8GRGHImL/gtwLSVJbtc7Dz8xRYHRS2zUtt5/iez/IBXiSqef3JUld4JW2klQIA1+SCmHgS1IhDHxJKoRfYi5pwYysXcXK4Zlj5uSpMxx78aVFqqhsBr6kBbNyeDnX3r5vxjEP3buV3rp8aeky8JeAmY6ivMJW0gQDfwmoexQlqWx+aCtJhTDwJakQBr4kFcLAl6RCGPiSVAgDX5IKYeBLUiEMfEkqhBdeSeq61qvFp7s63DV35s/Al9R1rrmzOJzSkaRCGPiSVAgDX5IKYeBLUiEMfEkqhGfp9LC6Xw8nSXUY+D3MLzaR1ElO6UhSIQx8SSpErSmdiLgJ2AEMATsz875J/RuB+4HzgceA2zLzTERcDOwBXgcksC0zj3ewfklSTW0DPyIuBO4G3gicAr4WEQcz85mWYXuAd2fm4xHxl8CtwJ8DHwU+mpmfiog7gTuBD3T6TvQbP4yVZu/l02enXWen1amXzzI8tKztuBLX5qlzhL8FeDQzXwCIiL3A9cAfVNuXAKsy8/Fq/G7g9yPiAeBngV9paf976gX+MoDBwYFad2KxzbeulcPLedcffXnGMX+54xcBeN2rV9X6mXXGdfJn9fq4Xq6t0+N6ubZOjhtasazt+waa7526407M873caxnVUs+Uv/EGzp07N+MPiIgPAudl5o5q+93AlZm5vdp+E3BPZm6utl8P7Ad+DvhGZl5UtS8HvpuZQzXq3gx8tcY4SdL3uwr4h8mNdY7wp/oVNl6jv91+M/kGzYKPAGdr7iNJpVsGrKeZod+nTuAfphm+E9YDz0/qv2CK/qPA2ohYlplnp9hvJqeY4reTJKmt70zXUee0zAPA1RHRiIjVwHXAIxOdmfkccDIiNlVNNwMPZ+ZpmtMyN7a2z6F4SVIHtA38zDwM3AEcBA4Bo5n5RETsj4grqmHbgJ0R8SxwHrCran8vsD0inqH5V8KOTt8BSVI9bT+0lSQtDV5pK0mFMPAlqRAGviQVwsCXpEK4Hv4cRcTlwOOZOdztWqZTnSr7EWAFMAa8szqNtqe0W5yvl0TE7wFvrza/mJm/08166oiIe4BGZt7S7VqmExHXAnfRPMvvS5n5m92taHoR8WvAB6vNhzPz/d2sZzY8wp+D6nqEP6MZUL3sE8C7MnNjdXtXm/GLrmVxvs3AZTRP4720u1VNLSK2AL8IXA5sBN4YEW/rblUzi4irgVu6XcdMIuKHgL8AtgI/DvxkRLylu1VNrXrv76K5dMxlwFXV66IvGPhzcy+ws9tFzCQihoEdmfl01fQ0cHEXS5rOK4vzZeYJYGJxvl50BLg9M1+uLix8lt58TAGIiNfQ/GX6oW7X0sbbgL/JzP+oHtcbga93uabpLKOZm+fR/Mt5BdA3S246pTNLEfFWYHVm7o2Ibpczrcw8RXPZaiJikOafy5/rZk3T2EAzSCccAa7sUi0zysx/nrgdET9CM5h+pnsVtfUxmhdN/kC3C2nj9cDLEfElmsu0PERzKfWek5nHqqXev0Uz6P8O+FpXi5oFA38aEXED338U/y1gLc2j0p4xXa2ZuSUihoAHaT7XvXikN59F9roiIn4M+CLw/sz8l27XM5VqVdt/z8yvRMQt3a6njeU0l1J/M3Ac2Ae8g+aS6j0lIn4CeCdwCfB/NA+q3g/c08266jLwp5GZnwE+09pWvYk+CDw2cXQfEYeAqzLz2KIXWZmqVoCIWAN8nuYHtlurP5d7TbvF+XpK9UH4Z4HfysxPdbueGdwIrK9en68B1kTEzsz87S7XNZX/BA5k5lGAiPgczb/ydnezqGn8EvCVzPwvgIjYTXMJGQN/qcnMB4AHJrYj4lz1gWiv2gN8G3hPZvbqGhoHgLsiogGcoLk43/buljS1iPgBmtNiN2bmo92uZyaZ+QsTt6sj/Df3aNgDfAF4MCJeBRwD3kJvTj8CPAV8OCLOA74LXMs0SxH3Ij+0XaKq00a3ApuAJyPiUETs73JZ32e6xfm6W9W03g+sBP60ejwPRcRt3S6q32Xm14EP01wS/RngOeDjXS1qGpn5ZeCTwDdpngixAvjjrhY1Cy6eJkmF8Ahfkgph4EtSIQx8SSqEgS9JhTDwJakQBr4kFcLAl6RCGPiSVIj/B2kl2CPiXnJXAAAAAElFTkSuQmCC\n",
420
+ "text/plain": [
421
+ "<Figure size 432x288 with 1 Axes>"
422
+ ]
423
+ },
424
+ "metadata": {
425
+ "needs_background": "light"
426
+ },
427
+ "output_type": "display_data"
428
+ }
429
+ ],
430
+ "source": [
431
+ "plt.bar(x=x,height=pmf,align='edge',width=-0.43)"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "markdown",
436
+ "metadata": {},
437
+ "source": [
438
+ "The leftmost peak came from the first sketch, with data centered around 0.0. The smaller, rightmost peak came from our second sketch, which had half as many samples and was centered around 4.0. The KLL sketch captures the shape of the combiend distribution."
439
+ ]
440
+ }
441
+ ],
442
+ "metadata": {
443
+ "kernelspec": {
444
+ "display_name": "Python 3",
445
+ "language": "python",
446
+ "name": "python3"
447
+ },
448
+ "language_info": {
449
+ "codemirror_mode": {
450
+ "name": "ipython",
451
+ "version": 3
452
+ },
453
+ "file_extension": ".py",
454
+ "mimetype": "text/x-python",
455
+ "name": "python",
456
+ "nbconvert_exporter": "python",
457
+ "pygments_lexer": "ipython3",
458
+ "version": "3.7.0"
459
+ }
460
+ },
461
+ "nbformat": 4,
462
+ "nbformat_minor": 2
463
+ }