dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,261 @@
1
+ """Percentile functions for dask arrays."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from collections.abc import Iterator
7
+ from functools import wraps
8
+ from numbers import Number
9
+
10
+ import numpy as np
11
+ from tlz import merge
12
+
13
+ from dask_array._dispatch import empty_lookup, percentile_lookup
14
+ from dask.base import tokenize
15
+ from dask.utils import derived_from
16
+
17
+ from dask_array.core import from_graph
18
+
19
+
20
+ @wraps(np.percentile)
21
+ def _percentile(a, q, method="linear"):
22
+ n = len(a)
23
+ if not len(a):
24
+ return None, n
25
+ if isinstance(q, Iterator):
26
+ q = list(q)
27
+ if a.dtype.name == "category":
28
+ result = np.percentile(a.cat.codes, q, method=method)
29
+ import pandas as pd
30
+
31
+ return pd.Categorical.from_codes(result, a.dtype.categories, a.dtype.ordered), n
32
+ if type(a.dtype).__name__ == "DatetimeTZDtype":
33
+ import pandas as pd
34
+
35
+ if isinstance(a, (pd.Series, pd.Index)):
36
+ a = a.values
37
+
38
+ if np.issubdtype(a.dtype, np.datetime64):
39
+ values = a
40
+ if type(a).__name__ in ("Series", "Index"):
41
+ a2 = values.astype("i8")
42
+ else:
43
+ a2 = values.view("i8")
44
+ result = np.percentile(a2, q, method=method).astype(values.dtype)
45
+ if q[0] == 0:
46
+ result[0] = min(result[0], values.min())
47
+ return result, n
48
+ if not np.issubdtype(a.dtype, np.number):
49
+ method = "nearest"
50
+ return np.percentile(a, q, method=method), n
51
+
52
+
53
+ def _tdigest_chunk(a):
54
+ from crick import TDigest
55
+
56
+ t = TDigest()
57
+ t.update(a)
58
+
59
+ return t
60
+
61
+
62
+ def _percentiles_from_tdigest(qs, digests):
63
+ from crick import TDigest
64
+
65
+ t = TDigest()
66
+ t.merge(*digests)
67
+
68
+ return np.array(t.quantile(qs / 100.0))
69
+
70
+
71
+ def merge_percentiles(finalq, qs, vals, method="lower", Ns=None, raise_on_nan=True):
72
+ """Combine several percentile calculations of different data."""
73
+ from dask_array._utils import array_safe
74
+
75
+ if isinstance(finalq, Iterator):
76
+ finalq = list(finalq)
77
+ finalq = array_safe(finalq, like=finalq)
78
+ qs = [list(q) for q in qs]
79
+ vals = list(vals)
80
+ if Ns is None:
81
+ vals, Ns = zip(*vals)
82
+ Ns = list(Ns)
83
+
84
+ L = list(zip(*((q, val, N) for q, val, N in zip(qs, vals, Ns) if N)))
85
+ if not L:
86
+ if raise_on_nan:
87
+ raise ValueError("No non-trivial arrays found")
88
+ return np.full(len(qs[0]) - 2, np.nan)
89
+ qs, vals, Ns = L
90
+
91
+ if vals[0].dtype.name == "category":
92
+ result = merge_percentiles(finalq, qs, [v.codes for v in vals], method, Ns, raise_on_nan)
93
+ import pandas as pd
94
+
95
+ return pd.Categorical.from_codes(result, vals[0].categories, vals[0].ordered)
96
+ if not np.issubdtype(vals[0].dtype, np.number):
97
+ method = "nearest"
98
+
99
+ if len(vals) != len(qs) or len(Ns) != len(qs):
100
+ raise ValueError("qs, vals, and Ns parameters must be the same length")
101
+
102
+ total_len = sum(len(q) for q in qs)
103
+ counts = empty_lookup.dispatch(type(finalq))(total_len, dtype=finalq.dtype)
104
+ start = 0
105
+ for q, N in zip(qs, Ns):
106
+ length = len(q)
107
+ count = empty_lookup.dispatch(type(finalq))(length, dtype=finalq.dtype)
108
+ count[1:] = np.diff(array_safe(q, like=q[0]))
109
+ count[0] = q[0]
110
+ count *= N
111
+ counts[start : start + length] = count
112
+ start += length
113
+
114
+ combined_vals = np.concatenate(vals)
115
+ combined_counts = array_safe(counts, like=combined_vals)
116
+ sort_order = np.argsort(combined_vals)
117
+ combined_vals = np.take(combined_vals, sort_order)
118
+ combined_counts = np.take(combined_counts, sort_order)
119
+
120
+ combined_q = np.cumsum(combined_counts)
121
+
122
+ finalq = array_safe(finalq, like=combined_vals)
123
+ desired_q = finalq * sum(Ns)
124
+
125
+ if method == "linear":
126
+ rv = np.interp(desired_q, combined_q, combined_vals)
127
+ else:
128
+ left = np.searchsorted(combined_q, desired_q, side="left")
129
+ right = np.searchsorted(combined_q, desired_q, side="right") - 1
130
+ np.minimum(left, len(combined_vals) - 1, out=left)
131
+ lower = np.minimum(left, right)
132
+ upper = np.maximum(left, right)
133
+ if method == "lower":
134
+ rv = combined_vals[lower]
135
+ elif method == "higher":
136
+ rv = combined_vals[upper]
137
+ elif method == "midpoint":
138
+ rv = 0.5 * (combined_vals[lower] + combined_vals[upper])
139
+ elif method == "nearest":
140
+ lower_residual = np.abs(combined_q[lower] - desired_q)
141
+ upper_residual = np.abs(combined_q[upper] - desired_q)
142
+ mask = lower_residual > upper_residual
143
+ index = lower
144
+ index[mask] = upper[mask]
145
+ rv = combined_vals[index]
146
+ else:
147
+ raise ValueError("interpolation method can only be 'linear', 'lower', 'higher', 'midpoint', or 'nearest'")
148
+ return rv
149
+
150
+
151
+ def percentile(a, q, method="linear", internal_method="default", **kwargs):
152
+ """Approximate percentile of 1-D array
153
+
154
+ Parameters
155
+ ----------
156
+ a : Array
157
+ q : array_like of float
158
+ Percentile or sequence of percentiles to compute, which must be between
159
+ 0 and 100 inclusive.
160
+ method : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}, optional
161
+ The interpolation method to use when the desired percentile lies
162
+ between two data points.
163
+ internal_method : {'default', 'dask', 'tdigest'}, optional
164
+ What internal method to use. By default will use dask's internal custom
165
+ algorithm (``'dask'``).
166
+ """
167
+ from dask_array._utils import array_safe, meta_from_array
168
+ from dask_array.reductions import quantile
169
+
170
+ if a.ndim == 1:
171
+ allowed_internal_methods = {"default", "dask", "tdigest"}
172
+
173
+ if method in allowed_internal_methods:
174
+ warnings.warn(
175
+ "The `method=` argument was renamed to `internal_method=`",
176
+ FutureWarning,
177
+ )
178
+ internal_method = method
179
+
180
+ if "interpolation" in kwargs:
181
+ warnings.warn(
182
+ "The `interpolation=` argument to percentile was renamed to `method= ` ",
183
+ FutureWarning,
184
+ )
185
+ method = kwargs.pop("interpolation")
186
+
187
+ if kwargs:
188
+ raise TypeError(f"percentile() got an unexpected keyword argument {kwargs.keys()}")
189
+
190
+ q_is_number = False
191
+ if isinstance(q, Number):
192
+ q_is_number = True
193
+ q = [q]
194
+ q = array_safe(q, like=meta_from_array(a))
195
+ token = tokenize(a, q, method)
196
+
197
+ dtype = a.dtype
198
+ if np.issubdtype(dtype, np.integer):
199
+ dtype = (array_safe([], dtype=dtype, like=meta_from_array(a)) / 0.5).dtype
200
+ meta = meta_from_array(a, dtype=dtype)
201
+
202
+ if internal_method not in allowed_internal_methods:
203
+ raise ValueError(f"`internal_method=` must be one of {allowed_internal_methods}")
204
+
205
+ if (
206
+ internal_method == "tdigest"
207
+ and method == "linear"
208
+ and (np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.integer))
209
+ ):
210
+ from dask.utils import import_required
211
+
212
+ import_required("crick", "crick is a required dependency for using the t-digest method.")
213
+
214
+ name = "percentile_tdigest_chunk-" + token
215
+ dsk = {(name, i): (_tdigest_chunk, key) for i, key in enumerate(a.__dask_keys__())}
216
+
217
+ name2 = "percentile_tdigest-" + token
218
+ dsk2 = {(name2, 0): (_percentiles_from_tdigest, q, sorted(dsk))}
219
+
220
+ else:
221
+ zero = empty_lookup.dispatch(type(q))(1, dtype=q.dtype)
222
+ zero[:] = 0
223
+
224
+ hundred = empty_lookup.dispatch(type(q))(1, dtype=q.dtype)
225
+ hundred[:] = 100
226
+
227
+ calc_q = np.concatenate((zero, q, hundred))
228
+ name = "percentile_chunk-" + token
229
+ dsk = {(name, i): (percentile_lookup, key, calc_q, method) for i, key in enumerate(a.__dask_keys__())}
230
+
231
+ name2 = "percentile-" + token
232
+ dsk2 = {
233
+ (name2, 0): (
234
+ merge_percentiles,
235
+ q,
236
+ [calc_q] * len(a.chunks[0]),
237
+ sorted(dsk),
238
+ method,
239
+ )
240
+ }
241
+ dsk = merge(dsk, dsk2)
242
+ # Merge the dependency graph with our new tasks
243
+ full_dsk = dict(a.__dask_graph__())
244
+ full_dsk.update(dsk)
245
+ arr = from_graph(full_dsk, meta, ((len(q),),), [(name2, 0)], name2)
246
+ return arr.reshape(()) if q_is_number else arr
247
+
248
+ elif a.ndim > 1:
249
+ q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
250
+ return quantile(a, q, method=method, **kwargs)
251
+ else:
252
+ raise NotImplementedError("support for arrays of ndim 0 is not implemented.")
253
+
254
+
255
+ @derived_from(np)
256
+ def nanpercentile(a, q, **kwargs):
257
+ from dask_array.reductions import nanquantile
258
+
259
+ q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
260
+
261
+ return nanquantile(a, q, **kwargs)