dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
dask_array/fft.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""FFT operations for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import scipy
|
|
13
|
+
import scipy.fftpack
|
|
14
|
+
except ImportError:
|
|
15
|
+
scipy = None
|
|
16
|
+
|
|
17
|
+
from dask_array._collection import asarray, concatenate
|
|
18
|
+
from dask_array._map_blocks import map_blocks
|
|
19
|
+
from dask_array.creation import arange
|
|
20
|
+
from dask_array._numpy_compat import NUMPY_GE_200
|
|
21
|
+
from dask.utils import derived_from, skip_doctest
|
|
22
|
+
|
|
23
|
+
chunk_error = (
|
|
24
|
+
"Dask array only supports taking an FFT along an axis that \n"
|
|
25
|
+
"has a single chunk. An FFT operation was tried on axis %s \n"
|
|
26
|
+
"which has chunks %s. To change the array's chunks use "
|
|
27
|
+
"dask.Array.rechunk."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
fft_preamble = """
|
|
31
|
+
Wrapping of %s
|
|
32
|
+
|
|
33
|
+
The axis along which the FFT is applied must have only one chunk. To change
|
|
34
|
+
the array's chunking use dask.Array.rechunk.
|
|
35
|
+
|
|
36
|
+
The %s docstring follows below:
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _fft_out_chunks(a, s, axes):
|
|
42
|
+
"""For computing the output chunks of [i]fft*"""
|
|
43
|
+
if s is None:
|
|
44
|
+
return a.chunks
|
|
45
|
+
chunks = list(a.chunks)
|
|
46
|
+
for i, axis in enumerate(axes):
|
|
47
|
+
chunks[axis] = (s[i],)
|
|
48
|
+
return tuple(chunks)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _rfft_out_chunks(a, s, axes):
|
|
52
|
+
"""For computing the output chunks of rfft*"""
|
|
53
|
+
if s is None:
|
|
54
|
+
s = [a.chunks[axis][0] for axis in axes]
|
|
55
|
+
s = list(s)
|
|
56
|
+
s[-1] = s[-1] // 2 + 1
|
|
57
|
+
chunks = list(a.chunks)
|
|
58
|
+
for i, axis in enumerate(axes):
|
|
59
|
+
chunks[axis] = (s[i],)
|
|
60
|
+
return tuple(chunks)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _irfft_out_chunks(a, s, axes):
|
|
64
|
+
"""For computing the output chunks of irfft*"""
|
|
65
|
+
if s is None:
|
|
66
|
+
s = [a.chunks[axis][0] for axis in axes]
|
|
67
|
+
s[-1] = 2 * (s[-1] - 1)
|
|
68
|
+
chunks = list(a.chunks)
|
|
69
|
+
for i, axis in enumerate(axes):
|
|
70
|
+
chunks[axis] = (s[i],)
|
|
71
|
+
return tuple(chunks)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _hfft_out_chunks(a, s, axes):
|
|
75
|
+
assert len(axes) == 1
|
|
76
|
+
|
|
77
|
+
axis = axes[0]
|
|
78
|
+
|
|
79
|
+
if s is None:
|
|
80
|
+
s = [2 * (a.chunks[axis][0] - 1)]
|
|
81
|
+
|
|
82
|
+
n = s[0]
|
|
83
|
+
|
|
84
|
+
chunks = list(a.chunks)
|
|
85
|
+
chunks[axis] = (n,)
|
|
86
|
+
return tuple(chunks)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _ihfft_out_chunks(a, s, axes):
|
|
90
|
+
assert len(axes) == 1
|
|
91
|
+
|
|
92
|
+
axis = axes[0]
|
|
93
|
+
|
|
94
|
+
if s is None:
|
|
95
|
+
s = [a.chunks[axis][0]]
|
|
96
|
+
else:
|
|
97
|
+
assert len(s) == 1
|
|
98
|
+
|
|
99
|
+
n = s[0]
|
|
100
|
+
|
|
101
|
+
chunks = list(a.chunks)
|
|
102
|
+
if n % 2 == 0:
|
|
103
|
+
m = (n // 2) + 1
|
|
104
|
+
else:
|
|
105
|
+
m = (n + 1) // 2
|
|
106
|
+
chunks[axis] = (m,)
|
|
107
|
+
return tuple(chunks)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
_out_chunk_fns = {
|
|
111
|
+
"fft": _fft_out_chunks,
|
|
112
|
+
"ifft": _fft_out_chunks,
|
|
113
|
+
"rfft": _rfft_out_chunks,
|
|
114
|
+
"irfft": _irfft_out_chunks,
|
|
115
|
+
"hfft": _hfft_out_chunks,
|
|
116
|
+
"ihfft": _ihfft_out_chunks,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def fft_wrap(fft_func, kind=None, dtype=None, allow_fftpack=False):
|
|
121
|
+
"""Wrap 1D, 2D, and ND real and complex FFT functions
|
|
122
|
+
|
|
123
|
+
Takes a function that behaves like ``numpy.fft`` functions and
|
|
124
|
+
a specified kind to match it to that are named after the functions
|
|
125
|
+
in the ``numpy.fft`` API.
|
|
126
|
+
|
|
127
|
+
Supported kinds include:
|
|
128
|
+
|
|
129
|
+
* fft
|
|
130
|
+
* fft2
|
|
131
|
+
* fftn
|
|
132
|
+
* ifft
|
|
133
|
+
* ifft2
|
|
134
|
+
* ifftn
|
|
135
|
+
* rfft
|
|
136
|
+
* rfft2
|
|
137
|
+
* rfftn
|
|
138
|
+
* irfft
|
|
139
|
+
* irfft2
|
|
140
|
+
* irfftn
|
|
141
|
+
* hfft
|
|
142
|
+
* ihfft
|
|
143
|
+
|
|
144
|
+
Examples
|
|
145
|
+
--------
|
|
146
|
+
>>> import dask_array.fft as dff
|
|
147
|
+
>>> parallel_fft = dff.fft_wrap(np.fft.fft)
|
|
148
|
+
>>> parallel_ifft = dff.fft_wrap(np.fft.ifft)
|
|
149
|
+
"""
|
|
150
|
+
if scipy is not None:
|
|
151
|
+
if fft_func.__module__.startswith("scipy.fftpack"):
|
|
152
|
+
if not allow_fftpack:
|
|
153
|
+
warnings.warn(
|
|
154
|
+
f"Function {fft_func.__name__} from `scipy.fftpack` does not "
|
|
155
|
+
"match NumPy's API and is considered legacy. Please use "
|
|
156
|
+
"`scipy.fft` instead. To suppress this warning and allow usage"
|
|
157
|
+
", set `allow_fftpack=True`. Support for `scipy.fftpack` will "
|
|
158
|
+
"be deprecated in future releases.",
|
|
159
|
+
FutureWarning,
|
|
160
|
+
)
|
|
161
|
+
# If allow_fftpack is True, we proceed but we skip passing the norm
|
|
162
|
+
# argument.
|
|
163
|
+
|
|
164
|
+
if kind is None:
|
|
165
|
+
kind = fft_func.__name__
|
|
166
|
+
try:
|
|
167
|
+
out_chunk_fn = _out_chunk_fns[kind.rstrip("2n")]
|
|
168
|
+
except KeyError:
|
|
169
|
+
raise ValueError(f"Given unknown `kind` {kind}.")
|
|
170
|
+
|
|
171
|
+
def func(a, s=None, axes=None, norm=None):
|
|
172
|
+
a = asarray(a)
|
|
173
|
+
if axes is None:
|
|
174
|
+
if kind.endswith("2"):
|
|
175
|
+
axes = (-2, -1)
|
|
176
|
+
elif kind.endswith("n"):
|
|
177
|
+
if s is None:
|
|
178
|
+
axes = tuple(range(a.ndim))
|
|
179
|
+
else:
|
|
180
|
+
if NUMPY_GE_200:
|
|
181
|
+
# Match deprecation in numpy
|
|
182
|
+
warnings.warn(
|
|
183
|
+
"DeprecationWarning: `axes` should not be `None` "
|
|
184
|
+
"if `s` is not `None` (Deprecated in NumPy 2.0)",
|
|
185
|
+
DeprecationWarning,
|
|
186
|
+
)
|
|
187
|
+
axes = tuple(range(len(s)))
|
|
188
|
+
else:
|
|
189
|
+
axes = (-1,)
|
|
190
|
+
elif len(set(axes)) < len(axes):
|
|
191
|
+
raise ValueError("Duplicate axes not allowed.")
|
|
192
|
+
|
|
193
|
+
_dtype = dtype
|
|
194
|
+
if _dtype is None:
|
|
195
|
+
sample = np.ones(a.ndim * (8,), dtype=a.dtype)
|
|
196
|
+
try:
|
|
197
|
+
_dtype = fft_func(sample, axes=axes, norm=norm).dtype
|
|
198
|
+
except TypeError:
|
|
199
|
+
_dtype = fft_func(sample).dtype
|
|
200
|
+
|
|
201
|
+
for each_axis in axes:
|
|
202
|
+
if len(a.chunks[each_axis]) != 1:
|
|
203
|
+
raise ValueError(chunk_error % (each_axis, a.chunks[each_axis]))
|
|
204
|
+
|
|
205
|
+
chunks = out_chunk_fn(a, s, axes)
|
|
206
|
+
|
|
207
|
+
args = (s, axes, norm)
|
|
208
|
+
if kind.endswith("fft"):
|
|
209
|
+
axis = None if axes is None else axes[0]
|
|
210
|
+
n = None if s is None else s[0]
|
|
211
|
+
args = (n, axis, norm)
|
|
212
|
+
|
|
213
|
+
return map_blocks(fft_func, a, *args, dtype=_dtype, chunks=chunks)
|
|
214
|
+
|
|
215
|
+
if kind.endswith("fft"):
|
|
216
|
+
_func = func
|
|
217
|
+
|
|
218
|
+
def func(a, n=None, axis=None, norm=None):
|
|
219
|
+
s = None
|
|
220
|
+
if n is not None:
|
|
221
|
+
s = (n,)
|
|
222
|
+
|
|
223
|
+
axes = None
|
|
224
|
+
if axis is not None:
|
|
225
|
+
axes = (axis,)
|
|
226
|
+
|
|
227
|
+
return _func(a, s, axes, norm)
|
|
228
|
+
|
|
229
|
+
func_mod = inspect.getmodule(fft_func)
|
|
230
|
+
func_name = fft_func.__name__
|
|
231
|
+
func_fullname = f"{func_mod.__name__}.{func_name}"
|
|
232
|
+
if fft_func.__doc__ is not None:
|
|
233
|
+
func.__doc__ = fft_preamble % (2 * (func_fullname,))
|
|
234
|
+
func.__doc__ += fft_func.__doc__
|
|
235
|
+
func.__doc__ = skip_doctest(func.__doc__)
|
|
236
|
+
func.__name__ = func_name
|
|
237
|
+
return func
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
fft = fft_wrap(np.fft.fft)
|
|
241
|
+
fft2 = fft_wrap(np.fft.fft2)
|
|
242
|
+
fftn = fft_wrap(np.fft.fftn)
|
|
243
|
+
ifft = fft_wrap(np.fft.ifft)
|
|
244
|
+
ifft2 = fft_wrap(np.fft.ifft2)
|
|
245
|
+
ifftn = fft_wrap(np.fft.ifftn)
|
|
246
|
+
rfft = fft_wrap(np.fft.rfft)
|
|
247
|
+
rfft2 = fft_wrap(np.fft.rfft2)
|
|
248
|
+
rfftn = fft_wrap(np.fft.rfftn)
|
|
249
|
+
irfft = fft_wrap(np.fft.irfft)
|
|
250
|
+
irfft2 = fft_wrap(np.fft.irfft2)
|
|
251
|
+
irfftn = fft_wrap(np.fft.irfftn)
|
|
252
|
+
hfft = fft_wrap(np.fft.hfft)
|
|
253
|
+
ihfft = fft_wrap(np.fft.ihfft)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _fftfreq_block(i, n, d):
|
|
257
|
+
r = i.copy()
|
|
258
|
+
r[i >= (n + 1) // 2] -= n
|
|
259
|
+
r /= n * d
|
|
260
|
+
return r
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
@derived_from(np.fft)
|
|
264
|
+
def fftfreq(n, d=1.0, chunks="auto"):
|
|
265
|
+
n = int(n)
|
|
266
|
+
d = float(d)
|
|
267
|
+
|
|
268
|
+
r = arange(n, dtype=float, chunks=chunks)
|
|
269
|
+
|
|
270
|
+
return map_blocks(_fftfreq_block, r, dtype=float, n=n, d=d)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
@derived_from(np.fft)
|
|
274
|
+
def rfftfreq(n, d=1.0, chunks="auto"):
|
|
275
|
+
n = int(n)
|
|
276
|
+
d = float(d)
|
|
277
|
+
|
|
278
|
+
r = arange(n // 2 + 1, dtype=float, chunks=chunks)
|
|
279
|
+
r = r / (n * d)
|
|
280
|
+
|
|
281
|
+
return r
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _fftshift_helper(x, axes=None, inverse=False):
|
|
285
|
+
if axes is None:
|
|
286
|
+
axes = list(range(x.ndim))
|
|
287
|
+
elif not isinstance(axes, Sequence):
|
|
288
|
+
axes = (axes,)
|
|
289
|
+
|
|
290
|
+
y = x
|
|
291
|
+
for i in axes:
|
|
292
|
+
n = y.shape[i]
|
|
293
|
+
n_2 = (n + int(inverse is False)) // 2
|
|
294
|
+
|
|
295
|
+
l = y.ndim * [slice(None)]
|
|
296
|
+
l[i] = slice(None, n_2)
|
|
297
|
+
l = tuple(l)
|
|
298
|
+
|
|
299
|
+
r = y.ndim * [slice(None)]
|
|
300
|
+
r[i] = slice(n_2, None)
|
|
301
|
+
r = tuple(r)
|
|
302
|
+
|
|
303
|
+
y = concatenate([y[r], y[l]], axis=i)
|
|
304
|
+
|
|
305
|
+
if len(x.chunks[i]) == 1:
|
|
306
|
+
y = y.rechunk({i: x.chunks[i]})
|
|
307
|
+
|
|
308
|
+
return y
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@derived_from(np.fft)
|
|
312
|
+
def fftshift(x, axes=None):
|
|
313
|
+
x = asarray(x)
|
|
314
|
+
return _fftshift_helper(x, axes=axes, inverse=False)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@derived_from(np.fft)
|
|
318
|
+
def ifftshift(x, axes=None):
|
|
319
|
+
x = asarray(x)
|
|
320
|
+
return _fftshift_helper(x, axes=axes, inverse=True)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""IO functions for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dask_array.io._base import IO
|
|
6
|
+
from dask_array.io._from_array import FromArray
|
|
7
|
+
from dask_array.io._from_delayed import FromDelayed, from_delayed
|
|
8
|
+
from dask_array.io._from_graph import FromGraph
|
|
9
|
+
from dask_array.io._from_npy_stack import FromNpyStack, from_npy_stack
|
|
10
|
+
from dask_array.io._store import (
|
|
11
|
+
get_scheduler_lock,
|
|
12
|
+
load_chunk,
|
|
13
|
+
load_store_chunk,
|
|
14
|
+
store,
|
|
15
|
+
to_hdf5,
|
|
16
|
+
)
|
|
17
|
+
from dask_array.io._tiledb import from_tiledb, to_tiledb
|
|
18
|
+
from dask_array.io._to_npy_stack import to_npy_stack
|
|
19
|
+
from dask_array.io._zarr import from_zarr, to_zarr
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"IO",
|
|
23
|
+
"FromArray",
|
|
24
|
+
"FromDelayed",
|
|
25
|
+
"FromGraph",
|
|
26
|
+
"FromNpyStack",
|
|
27
|
+
"from_delayed",
|
|
28
|
+
"from_npy_stack",
|
|
29
|
+
"from_tiledb",
|
|
30
|
+
"from_zarr",
|
|
31
|
+
"get_scheduler_lock",
|
|
32
|
+
"load_chunk",
|
|
33
|
+
"load_store_chunk",
|
|
34
|
+
"store",
|
|
35
|
+
"to_hdf5",
|
|
36
|
+
"to_npy_stack",
|
|
37
|
+
"to_tiledb",
|
|
38
|
+
"to_zarr",
|
|
39
|
+
]
|
dask_array/io/_base.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dask_array._expr import ArrayExpr
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class IO(ArrayExpr):
|
|
7
|
+
# Whether rechunk can be pushed into this IO expression by modifying its chunks.
|
|
8
|
+
# False by default since many IO expressions have chunks that affect computation
|
|
9
|
+
# (e.g., Random generates different values with different chunks).
|
|
10
|
+
_can_rechunk_pushdown = False
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
from itertools import product
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from dask_array.io._base import IO
|
|
9
|
+
from dask_array._core_utils import (
|
|
10
|
+
getter,
|
|
11
|
+
getter_nofancy,
|
|
12
|
+
graph_from_arraylike,
|
|
13
|
+
normalize_chunks,
|
|
14
|
+
slices_from_chunks,
|
|
15
|
+
)
|
|
16
|
+
from dask_array._utils import meta_from_array
|
|
17
|
+
from dask.utils import SerializableLock
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FromArray(IO):
|
|
21
|
+
_parameters = [
|
|
22
|
+
"array",
|
|
23
|
+
"_chunks",
|
|
24
|
+
"lock",
|
|
25
|
+
"getitem",
|
|
26
|
+
"inline_array",
|
|
27
|
+
"meta",
|
|
28
|
+
"asarray",
|
|
29
|
+
"fancy",
|
|
30
|
+
"_name_override",
|
|
31
|
+
"_region", # Slice region for pushdown (tuple of slices or None)
|
|
32
|
+
]
|
|
33
|
+
_defaults = {
|
|
34
|
+
"_chunks": "auto",
|
|
35
|
+
"getitem": None,
|
|
36
|
+
"inline_array": False,
|
|
37
|
+
"meta": None,
|
|
38
|
+
"asarray": None,
|
|
39
|
+
"fancy": True,
|
|
40
|
+
"lock": False,
|
|
41
|
+
"_name_override": None,
|
|
42
|
+
"_region": None,
|
|
43
|
+
}
|
|
44
|
+
# FromArray reads static data, so rechunk can be pushed in safely
|
|
45
|
+
_can_rechunk_pushdown = True
|
|
46
|
+
# Slicing can be pushed into FromArray by slicing the source array
|
|
47
|
+
_slice_pushdown = True
|
|
48
|
+
|
|
49
|
+
@functools.cached_property
|
|
50
|
+
def _name(self):
|
|
51
|
+
# _name_override is a prefix, deterministic token is always appended
|
|
52
|
+
prefix = self.operand("_name_override") or "fromarray"
|
|
53
|
+
return f"{prefix}-{self.deterministic_token}"
|
|
54
|
+
|
|
55
|
+
@functools.cached_property
|
|
56
|
+
def _effective_shape(self):
|
|
57
|
+
"""Shape after applying region slice."""
|
|
58
|
+
region = self.operand("_region")
|
|
59
|
+
if region is None:
|
|
60
|
+
return self.array.shape
|
|
61
|
+
# Compute shape from region slices
|
|
62
|
+
return tuple(len(range(*slc.indices(dim_size))) for slc, dim_size in zip(region, self.array.shape))
|
|
63
|
+
|
|
64
|
+
@functools.cached_property
|
|
65
|
+
def chunks(self):
|
|
66
|
+
# Normalize chunks lazily - keeps repr compact with user-provided chunks
|
|
67
|
+
# Pass previous_chunks from underlying array (h5py, zarr) for alignment
|
|
68
|
+
previous_chunks = getattr(self.array, "chunks", None)
|
|
69
|
+
# Handle zarr 3.x shards attribute for write alignment
|
|
70
|
+
if hasattr(self.array, "shards") and self.array.shards is not None and self.operand("_chunks") == "auto":
|
|
71
|
+
previous_chunks = self.array.shards
|
|
72
|
+
return normalize_chunks(
|
|
73
|
+
self.operand("_chunks"),
|
|
74
|
+
self._effective_shape,
|
|
75
|
+
dtype=self.array.dtype,
|
|
76
|
+
previous_chunks=previous_chunks,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
@functools.cached_property
|
|
80
|
+
def _meta(self):
|
|
81
|
+
if self.operand("meta") is not None:
|
|
82
|
+
return meta_from_array(self.operand("meta"), ndim=len(self._effective_shape), dtype=self.array.dtype)
|
|
83
|
+
return meta_from_array(self.array, dtype=getattr(self.array, "dtype", None))
|
|
84
|
+
|
|
85
|
+
@functools.cached_property
|
|
86
|
+
def asarray_arg(self):
|
|
87
|
+
if self.operand("asarray") is None:
|
|
88
|
+
return not hasattr(self.array, "__array_function__")
|
|
89
|
+
else:
|
|
90
|
+
return self.operand("asarray")
|
|
91
|
+
|
|
92
|
+
def _layer(self):
|
|
93
|
+
lock = self.operand("lock")
|
|
94
|
+
region = self.operand("_region")
|
|
95
|
+
# Note: lock=True is already normalized to SerializableLock() in from_array()
|
|
96
|
+
|
|
97
|
+
is_ndarray = type(self.array) in (np.ndarray, np.ma.core.MaskedArray)
|
|
98
|
+
is_single_block = all(len(c) == 1 for c in self.chunks)
|
|
99
|
+
|
|
100
|
+
# Get slices for chunks (based on effective shape after region)
|
|
101
|
+
slices = slices_from_chunks(self.chunks)
|
|
102
|
+
|
|
103
|
+
# If region is set, offset all slices by the region start
|
|
104
|
+
if region is not None:
|
|
105
|
+
region_starts = tuple(slc.indices(dim_size)[0] for slc, dim_size in zip(region, self.array.shape))
|
|
106
|
+
slices = [
|
|
107
|
+
tuple(slice(s.start + offset, s.stop + offset, s.step) for s, offset in zip(slc, region_starts))
|
|
108
|
+
for slc in slices
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
# Always use the getter for h5py etc. Not using isinstance(x, np.ndarray)
|
|
112
|
+
# because np.matrix is a subclass of np.ndarray.
|
|
113
|
+
if is_ndarray and not is_single_block and not lock:
|
|
114
|
+
# eagerly slice numpy arrays to prevent memory blowup
|
|
115
|
+
# GH5367, GH5601
|
|
116
|
+
keys = product([self._name], *(range(len(bds)) for bds in self.chunks))
|
|
117
|
+
values = [self.array[slc] for slc in slices]
|
|
118
|
+
dsk = dict(zip(keys, values))
|
|
119
|
+
elif is_ndarray and is_single_block and not lock:
|
|
120
|
+
# Single block - slice with region (or full array) and copy
|
|
121
|
+
if region is not None:
|
|
122
|
+
dsk = {(self._name,) + (0,) * self.array.ndim: self.array[region].copy()}
|
|
123
|
+
else:
|
|
124
|
+
dsk = {(self._name,) + (0,) * self.array.ndim: self.array.copy()}
|
|
125
|
+
else:
|
|
126
|
+
getitem = self.operand("getitem")
|
|
127
|
+
if getitem is None:
|
|
128
|
+
if self.operand("fancy"):
|
|
129
|
+
getitem = getter
|
|
130
|
+
else:
|
|
131
|
+
getitem = getter_nofancy
|
|
132
|
+
|
|
133
|
+
# For non-numpy arrays with region, we need custom graph generation
|
|
134
|
+
# to apply the offset slices
|
|
135
|
+
if region is not None:
|
|
136
|
+
keys = list(product([self._name], *(range(len(bds)) for bds in self.chunks)))
|
|
137
|
+
if self.inline_array:
|
|
138
|
+
dsk = {k: (getitem, self.array, slc, self.asarray_arg, lock) for k, slc in zip(keys, slices)}
|
|
139
|
+
else:
|
|
140
|
+
# Put array in graph once, reference by key
|
|
141
|
+
arr_key = ("array-" + self._name,)
|
|
142
|
+
dsk = {arr_key: self.array}
|
|
143
|
+
dsk.update({k: (getitem, arr_key, slc, self.asarray_arg, lock) for k, slc in zip(keys, slices)})
|
|
144
|
+
else:
|
|
145
|
+
dsk = graph_from_arraylike(
|
|
146
|
+
self.array,
|
|
147
|
+
chunks=self.chunks,
|
|
148
|
+
shape=self.array.shape,
|
|
149
|
+
name=self._name,
|
|
150
|
+
lock=lock,
|
|
151
|
+
getitem=getitem,
|
|
152
|
+
asarray=self.asarray_arg,
|
|
153
|
+
inline_array=self.inline_array,
|
|
154
|
+
dtype=self.array.dtype,
|
|
155
|
+
)
|
|
156
|
+
return dict(dsk) # this comes as a legacy HLG for now
|
|
157
|
+
|
|
158
|
+
def __str__(self):
|
|
159
|
+
return "FromArray(...)"
|
|
160
|
+
|
|
161
|
+
def __dask_tokenize__(self):
|
|
162
|
+
from dask.tokenize import _tokenize_deterministic
|
|
163
|
+
|
|
164
|
+
# Handle non-serializable locks by using their id()
|
|
165
|
+
# Locks are identity-based objects, so using id() is semantically correct
|
|
166
|
+
lock = self.operand("lock")
|
|
167
|
+
if lock and not isinstance(lock, (bool, SerializableLock)):
|
|
168
|
+
lock_token = ("lock-id", id(lock))
|
|
169
|
+
else:
|
|
170
|
+
lock_token = lock
|
|
171
|
+
|
|
172
|
+
operands = [lock_token if p == "lock" else self.operand(p) for p in self._parameters]
|
|
173
|
+
return _tokenize_deterministic(type(self), *operands)
|
|
174
|
+
|
|
175
|
+
def _simplify_up(self, parent, dependents):
|
|
176
|
+
"""Allow slice operations to push into FromArray."""
|
|
177
|
+
from dask_array.slicing import SliceSlicesIntegers
|
|
178
|
+
|
|
179
|
+
if isinstance(parent, SliceSlicesIntegers):
|
|
180
|
+
return self._accept_slice(parent)
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
def _accept_slice(self, slice_expr):
|
|
184
|
+
"""Accept a slice by setting region (deferred slice).
|
|
185
|
+
|
|
186
|
+
Pushes the slice into the FromArray expression by recording it as a region,
|
|
187
|
+
which is then applied during layer generation.
|
|
188
|
+
"""
|
|
189
|
+
from numbers import Integral
|
|
190
|
+
|
|
191
|
+
from dask_array.slicing._basic import (
|
|
192
|
+
SliceSlicesIntegers,
|
|
193
|
+
_compose_slices,
|
|
194
|
+
_compute_sliced_chunks,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
index = slice_expr.index
|
|
198
|
+
|
|
199
|
+
# Only handle slices and integers (no None/newaxis, no fancy indexing)
|
|
200
|
+
if any(idx is None for idx in index):
|
|
201
|
+
return None
|
|
202
|
+
if any(not isinstance(idx, (slice, Integral)) for idx in index):
|
|
203
|
+
return None
|
|
204
|
+
# Don't push non-unit step slices - _layer doesn't handle them correctly
|
|
205
|
+
if any(isinstance(idx, slice) and idx.step is not None and idx.step != 1 for idx in index):
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
source = self.array
|
|
209
|
+
old_chunks = self.chunks # Use normalized chunks property
|
|
210
|
+
old_region = self.operand("_region")
|
|
211
|
+
|
|
212
|
+
# Pad index to full dimensions
|
|
213
|
+
full_index = index + (slice(None),) * (source.ndim - len(index))
|
|
214
|
+
|
|
215
|
+
# Check if any integers are present - they need special handling
|
|
216
|
+
has_integers = any(isinstance(idx, Integral) for idx in full_index)
|
|
217
|
+
|
|
218
|
+
# Convert integers to 1-element slices for region calculation
|
|
219
|
+
region_index = tuple(slice(idx, idx + 1) if isinstance(idx, Integral) else idx for idx in full_index)
|
|
220
|
+
|
|
221
|
+
# Compute new region by combining with existing region
|
|
222
|
+
if old_region is not None:
|
|
223
|
+
# Compose slices: new slice is relative to old region
|
|
224
|
+
new_region = tuple(
|
|
225
|
+
_compose_slices(old_slc, new_slc, dim_size)
|
|
226
|
+
for old_slc, new_slc, dim_size in zip(old_region, region_index, source.shape)
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
new_region = region_index
|
|
230
|
+
|
|
231
|
+
# Compute new chunks - use same chunk sizes but clipped to new shape
|
|
232
|
+
new_chunks = tuple(
|
|
233
|
+
_compute_sliced_chunks(dim_chunks, slc, dim_size)
|
|
234
|
+
for dim_chunks, slc, dim_size in zip(old_chunks, region_index, self._effective_shape)
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Create new FromArray with region (deferred slice)
|
|
238
|
+
new_io = FromArray(
|
|
239
|
+
source, # Keep original source, don't slice
|
|
240
|
+
new_chunks,
|
|
241
|
+
lock=self.operand("lock"),
|
|
242
|
+
getitem=self.operand("getitem"),
|
|
243
|
+
inline_array=self.inline_array,
|
|
244
|
+
meta=self.operand("meta"),
|
|
245
|
+
asarray=self.operand("asarray"),
|
|
246
|
+
fancy=self.operand("fancy"),
|
|
247
|
+
_name_override=self.operand("_name_override"),
|
|
248
|
+
_region=new_region,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# If integers were present, apply them to extract elements
|
|
252
|
+
if has_integers:
|
|
253
|
+
# Build index with 0s for integer dims (they're now size-1)
|
|
254
|
+
extract_index = tuple(0 if isinstance(idx, Integral) else slice(None) for idx in full_index)
|
|
255
|
+
return SliceSlicesIntegers(new_io, extract_index, False)
|
|
256
|
+
|
|
257
|
+
return new_io
|