dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,422 @@
|
|
|
1
|
+
"""Array conversion functions: from_array, asarray, asanyarray, array."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from dask_array._core_utils import getter_inline
|
|
12
|
+
from dask_array._utils import meta_from_array
|
|
13
|
+
from dask.base import is_dask_collection
|
|
14
|
+
from dask.utils import SerializableLock
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _as_dtype(a, dtype):
|
|
18
|
+
"""Apply dtype conversion if needed."""
|
|
19
|
+
if dtype is None:
|
|
20
|
+
return a
|
|
21
|
+
else:
|
|
22
|
+
return a.astype(dtype)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def from_array(
|
|
26
|
+
x,
|
|
27
|
+
chunks="auto",
|
|
28
|
+
lock=False,
|
|
29
|
+
asarray=None,
|
|
30
|
+
fancy=True,
|
|
31
|
+
getitem=None,
|
|
32
|
+
meta=None,
|
|
33
|
+
inline_array=False,
|
|
34
|
+
name=None,
|
|
35
|
+
):
|
|
36
|
+
"""Create dask array from something that looks like an array.
|
|
37
|
+
|
|
38
|
+
Input must have a ``.shape``, ``.ndim``, ``.dtype`` and support numpy-style slicing.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
x : array_like
|
|
43
|
+
chunks : int, tuple
|
|
44
|
+
How to chunk the array. Must be one of the following forms:
|
|
45
|
+
|
|
46
|
+
- A blocksize like 1000.
|
|
47
|
+
- A blockshape like (1000, 1000).
|
|
48
|
+
- Explicit sizes of all blocks along all dimensions like
|
|
49
|
+
((1000, 1000, 500), (400, 400)).
|
|
50
|
+
- A size in bytes, like "100 MiB" which will choose a uniform
|
|
51
|
+
block-like shape
|
|
52
|
+
- The word "auto" which acts like the above, but uses a configuration
|
|
53
|
+
value ``array.chunk-size`` for the chunk size
|
|
54
|
+
|
|
55
|
+
-1 or None as a blocksize indicate the size of the corresponding
|
|
56
|
+
dimension.
|
|
57
|
+
name : str or bool, optional
|
|
58
|
+
The key name to use for the array. Defaults to a hash of ``x``.
|
|
59
|
+
|
|
60
|
+
Hashing is useful if the same value of ``x`` is used to create multiple
|
|
61
|
+
arrays, as Dask can then recognise that they're the same and
|
|
62
|
+
avoid duplicate computations. However, it can also be slow, and if the
|
|
63
|
+
array is not contiguous it is copied for hashing. If the array uses
|
|
64
|
+
stride tricks (such as :func:`numpy.broadcast_to` or
|
|
65
|
+
:func:`skimage.util.view_as_windows`) to have a larger logical
|
|
66
|
+
than physical size, this copy can cause excessive memory usage.
|
|
67
|
+
|
|
68
|
+
If you don't need the deduplication provided by hashing, use
|
|
69
|
+
``name=False`` to generate a random name instead of hashing, which
|
|
70
|
+
avoids the pitfalls described above. Using ``name=True`` is
|
|
71
|
+
equivalent to the default.
|
|
72
|
+
|
|
73
|
+
By default, hashing uses python's standard sha1. This behaviour can be
|
|
74
|
+
changed by installing cityhash, xxhash or murmurhash. If installed,
|
|
75
|
+
a large-factor speedup can be obtained in the tokenisation step.
|
|
76
|
+
|
|
77
|
+
.. note::
|
|
78
|
+
|
|
79
|
+
Because this ``name`` is used as the key in task graphs, you should
|
|
80
|
+
ensure that it uniquely identifies the data contained within. If
|
|
81
|
+
you'd like to provide a descriptive name that is still unique, combine
|
|
82
|
+
the descriptive name with :func:`dask.base.tokenize` of the
|
|
83
|
+
``array_like``. See :ref:`graphs` for more.
|
|
84
|
+
|
|
85
|
+
lock : bool or Lock, optional
|
|
86
|
+
If ``x`` doesn't support concurrent reads then provide a lock here, or
|
|
87
|
+
pass in True to have dask.array create one for you.
|
|
88
|
+
asarray : bool, optional
|
|
89
|
+
If True then call np.asarray on chunks to convert them to numpy arrays.
|
|
90
|
+
If False then chunks are passed through unchanged.
|
|
91
|
+
If None (default) then we use True if the ``__array_function__`` method
|
|
92
|
+
is undefined.
|
|
93
|
+
|
|
94
|
+
.. note::
|
|
95
|
+
|
|
96
|
+
Dask does not preserve the memory layout of the original array when
|
|
97
|
+
the array is created using Fortran rather than C ordering.
|
|
98
|
+
|
|
99
|
+
fancy : bool, optional
|
|
100
|
+
If ``x`` doesn't support fancy indexing (e.g. indexing with lists or
|
|
101
|
+
arrays) then set to False. Default is True.
|
|
102
|
+
meta : Array-like, optional
|
|
103
|
+
The metadata for the resulting dask array. This is the kind of array
|
|
104
|
+
that will result from slicing the input array.
|
|
105
|
+
Defaults to the input array.
|
|
106
|
+
inline_array : bool, default False
|
|
107
|
+
How to include the array in the task graph. By default
|
|
108
|
+
(``inline_array=False``) the array is included in a task by itself,
|
|
109
|
+
and each chunk refers to that task by its key.
|
|
110
|
+
|
|
111
|
+
With ``inline_array=True``, Dask will instead inline the array directly
|
|
112
|
+
in the values of the task graph.
|
|
113
|
+
|
|
114
|
+
The right choice for ``inline_array`` depends on several factors,
|
|
115
|
+
including the size of ``x``, how expensive it is to create, which
|
|
116
|
+
scheduler you're using, and the pattern of downstream computations.
|
|
117
|
+
As a heuristic, ``inline_array=True`` may be the right choice when
|
|
118
|
+
the array ``x`` is cheap to serialize and deserialize (since it's
|
|
119
|
+
done for every task), and if you expect the scheduler to need to
|
|
120
|
+
move around the inputs relative to the workers. For example, HDF5
|
|
121
|
+
files would be a bad fit for ``inline_array=True``, while small
|
|
122
|
+
NumPy arrays would be a good fit.
|
|
123
|
+
getitem : callable, optional
|
|
124
|
+
Callable with signature (a, index) -> value to use for indexing the
|
|
125
|
+
array. Defaults to :func:`operator.getitem`.
|
|
126
|
+
|
|
127
|
+
Examples
|
|
128
|
+
--------
|
|
129
|
+
>>> x = h5py.File('...')['/data/path'] # doctest: +SKIP
|
|
130
|
+
>>> a = da.from_array(x, chunks=(1000, 1000)) # doctest: +SKIP
|
|
131
|
+
|
|
132
|
+
If your underlying datastore does not support concurrent reads then include
|
|
133
|
+
the ``lock=True`` keyword argument or ``lock=mylock`` if you want multiple
|
|
134
|
+
arrays to coordinate around the same lock.
|
|
135
|
+
|
|
136
|
+
>>> a = da.from_array(x, chunks=(1000, 1000), lock=True) # doctest: +SKIP
|
|
137
|
+
|
|
138
|
+
If your underlying datastore has a ``.chunks`` attribute (as h5py and zarr
|
|
139
|
+
datasets do) then a multiple of that chunk shape will be used if you
|
|
140
|
+
do not provide a chunk shape.
|
|
141
|
+
|
|
142
|
+
>>> a = da.from_array(x, chunks='auto') # doctest: +SKIP
|
|
143
|
+
>>> a = da.from_array(x, chunks='100 MiB') # doctest: +SKIP
|
|
144
|
+
>>> a = da.from_array(x) # doctest: +SKIP
|
|
145
|
+
|
|
146
|
+
If providing a name, ensure that it is unique
|
|
147
|
+
|
|
148
|
+
>>> import dask.base
|
|
149
|
+
>>> token = dask.base.tokenize(x) # doctest: +SKIP
|
|
150
|
+
>>> a = da.from_array('myarray-' + token) # doctest: +SKIP
|
|
151
|
+
|
|
152
|
+
NumPy ndarrays are chunked into a single chunk
|
|
153
|
+
|
|
154
|
+
>>> a = da.from_array(np.array([[1, 2], [3, 4]])) # doctest: +SKIP
|
|
155
|
+
>>> a.chunks # doctest: +SKIP
|
|
156
|
+
((2,), (2,))
|
|
157
|
+
"""
|
|
158
|
+
# Lazy imports to avoid circular dependencies
|
|
159
|
+
from dask_array._new_collection import new_collection
|
|
160
|
+
|
|
161
|
+
# Import Array for isinstance check
|
|
162
|
+
from dask_array._collection import Array
|
|
163
|
+
from dask_array.io import FromArray
|
|
164
|
+
from dask.utils import is_arraylike
|
|
165
|
+
|
|
166
|
+
if type(x).__module__.startswith("dask.array") and type(x).__name__ == "Array":
|
|
167
|
+
raise TypeError("dask_array does not accept dask.array.Array inputs")
|
|
168
|
+
if isinstance(x, Array):
|
|
169
|
+
raise ValueError("Array is already a dask array. Use 'asarray' or 'rechunk' instead.")
|
|
170
|
+
|
|
171
|
+
# Handle xarray DataArray wrapping a dask array
|
|
172
|
+
try:
|
|
173
|
+
import xarray as xr
|
|
174
|
+
|
|
175
|
+
if isinstance(x, xr.DataArray) and x.chunks is not None:
|
|
176
|
+
if type(x.data).__module__.startswith("dask.array") and type(x.data).__name__ == "Array":
|
|
177
|
+
raise TypeError("dask_array does not accept dask.array.Array inputs")
|
|
178
|
+
if isinstance(x.data, Array):
|
|
179
|
+
return x.data
|
|
180
|
+
except ImportError:
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
if is_dask_collection(x):
|
|
184
|
+
warnings.warn(
|
|
185
|
+
"Passing an object to dask.array.from_array which is already a "
|
|
186
|
+
"Dask collection. This can lead to unexpected behavior."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if isinstance(x, (list, tuple, memoryview) + np.ScalarType):
|
|
190
|
+
x = np.array(x)
|
|
191
|
+
|
|
192
|
+
if is_arraylike(x) and hasattr(x, "copy"):
|
|
193
|
+
x = x.copy()
|
|
194
|
+
|
|
195
|
+
# Validate chunks early to catch errors, but store original for compact repr
|
|
196
|
+
from dask_array._core_utils import normalize_chunks
|
|
197
|
+
|
|
198
|
+
normalize_chunks(chunks, x.shape, dtype=x.dtype) # validates
|
|
199
|
+
|
|
200
|
+
# Determine name prefix for the expression
|
|
201
|
+
# User-provided name is used as prefix, deterministic token always appended
|
|
202
|
+
if name in (None, True):
|
|
203
|
+
# Deterministic: use "array" prefix, token computed from operands
|
|
204
|
+
name_prefix = "array"
|
|
205
|
+
elif name is False:
|
|
206
|
+
# Non-deterministic: include UUID in prefix to ensure uniqueness
|
|
207
|
+
name_prefix = f"array-{uuid.uuid1()}"
|
|
208
|
+
else:
|
|
209
|
+
# Custom: use user-provided name as prefix
|
|
210
|
+
name_prefix = name
|
|
211
|
+
|
|
212
|
+
# Normalize lock=True to SerializableLock() for actual use
|
|
213
|
+
if lock is True:
|
|
214
|
+
lock = SerializableLock()
|
|
215
|
+
|
|
216
|
+
# Pass original chunks - normalization happens lazily in FromArray.chunks
|
|
217
|
+
return new_collection(
|
|
218
|
+
FromArray(
|
|
219
|
+
x,
|
|
220
|
+
chunks,
|
|
221
|
+
lock=lock,
|
|
222
|
+
asarray=asarray,
|
|
223
|
+
fancy=fancy,
|
|
224
|
+
getitem=getitem,
|
|
225
|
+
meta=meta,
|
|
226
|
+
inline_array=inline_array,
|
|
227
|
+
_name_override=name_prefix,
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def asarray(a, allow_unknown_chunksizes=False, dtype=None, order=None, *, like=None, **kwargs):
|
|
233
|
+
"""Convert the input to a dask array.
|
|
234
|
+
|
|
235
|
+
Parameters
|
|
236
|
+
----------
|
|
237
|
+
a : array-like
|
|
238
|
+
Input data, in any form that can be converted to a dask array. This
|
|
239
|
+
includes lists, lists of tuples, tuples, tuples of tuples, tuples of
|
|
240
|
+
lists and ndarrays.
|
|
241
|
+
allow_unknown_chunksizes: bool
|
|
242
|
+
Allow unknown chunksizes, such as come from converting from dask
|
|
243
|
+
dataframes. Dask.array is unable to verify that chunks line up. If
|
|
244
|
+
data comes from differently aligned sources then this can cause
|
|
245
|
+
unexpected results.
|
|
246
|
+
dtype : data-type, optional
|
|
247
|
+
By default, the data-type is inferred from the input data.
|
|
248
|
+
order : {'C', 'F', 'A', 'K'}, optional
|
|
249
|
+
Memory layout. 'A' and 'K' depend on the order of input array a.
|
|
250
|
+
'C' row-major (C-style), 'F' column-major (Fortran-style) memory
|
|
251
|
+
representation. 'A' (any) means 'F' if a is Fortran contiguous, 'C'
|
|
252
|
+
otherwise 'K' (keep) preserve input order. Defaults to 'C'.
|
|
253
|
+
like: array-like
|
|
254
|
+
Reference object to allow the creation of Dask arrays with chunks
|
|
255
|
+
that are not NumPy arrays. If an array-like passed in as ``like``
|
|
256
|
+
supports the ``__array_function__`` protocol, the chunk type of the
|
|
257
|
+
resulting array will be defined by it. In this case, it ensures the
|
|
258
|
+
creation of a Dask array compatible with that passed in via this
|
|
259
|
+
argument. If ``like`` is a Dask array, the chunk type of the
|
|
260
|
+
resulting array will be defined by the chunk type of ``like``.
|
|
261
|
+
Requires NumPy 1.20.0 or higher.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
out : dask array
|
|
266
|
+
Dask array interpretation of a.
|
|
267
|
+
|
|
268
|
+
Examples
|
|
269
|
+
--------
|
|
270
|
+
>>> import dask_array as da
|
|
271
|
+
>>> import numpy as np
|
|
272
|
+
>>> x = np.arange(3)
|
|
273
|
+
>>> da.asarray(x)
|
|
274
|
+
dask.array<array, shape=(3,), dtype=int64, chunksize=(3,), chunktype=numpy.ndarray>
|
|
275
|
+
|
|
276
|
+
>>> y = [[1, 2, 3], [4, 5, 6]]
|
|
277
|
+
>>> da.asarray(y)
|
|
278
|
+
dask.array<array, shape=(2, 3), dtype=int64, chunksize=(2, 3), chunktype=numpy.ndarray>
|
|
279
|
+
|
|
280
|
+
.. warning::
|
|
281
|
+
`order` is ignored if `a` is an `Array`, has the attribute ``to_dask_array``,
|
|
282
|
+
or is a list or tuple of `Array`'s.
|
|
283
|
+
"""
|
|
284
|
+
# Lazy imports to avoid circular dependencies
|
|
285
|
+
from dask_array._collection import Array
|
|
286
|
+
|
|
287
|
+
if like is None:
|
|
288
|
+
if isinstance(a, Array):
|
|
289
|
+
return _as_dtype(a, dtype)
|
|
290
|
+
elif hasattr(a, "to_dask_array"):
|
|
291
|
+
return _as_dtype(a.to_dask_array(), dtype)
|
|
292
|
+
elif type(a).__module__.split(".")[0] == "xarray" and hasattr(a, "data"):
|
|
293
|
+
return _as_dtype(asarray(a.data, order=order), dtype)
|
|
294
|
+
elif isinstance(a, (list, tuple)) and any(isinstance(i, Array) for i in a):
|
|
295
|
+
# Lazy import to avoid circular dependency
|
|
296
|
+
from dask_array.stacking import stack
|
|
297
|
+
|
|
298
|
+
return _as_dtype(stack(a, allow_unknown_chunksizes=allow_unknown_chunksizes), dtype)
|
|
299
|
+
elif not isinstance(getattr(a, "shape", None), Iterable):
|
|
300
|
+
a = np.asarray(a, dtype=dtype, order=order)
|
|
301
|
+
else:
|
|
302
|
+
from functools import partial
|
|
303
|
+
|
|
304
|
+
from dask_array._utils import asarray_safe
|
|
305
|
+
|
|
306
|
+
like_meta = meta_from_array(like)
|
|
307
|
+
if isinstance(a, Array):
|
|
308
|
+
# Use partial to pass dtype to asarray_safe, not to map_blocks
|
|
309
|
+
# (map_blocks' dtype parameter controls output metadata, not the function call)
|
|
310
|
+
return a.map_blocks(partial(asarray_safe, like=like_meta, dtype=dtype, order=order))
|
|
311
|
+
else:
|
|
312
|
+
a = asarray_safe(a, like=like_meta, dtype=dtype, order=order)
|
|
313
|
+
|
|
314
|
+
a = from_array(a, getitem=getter_inline, **kwargs)
|
|
315
|
+
return _as_dtype(a, dtype)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def asanyarray(a, dtype=None, order=None, *, like=None, inline_array=False):
|
|
319
|
+
"""Convert the input to a dask array.
|
|
320
|
+
|
|
321
|
+
Subclasses of ``np.ndarray`` will be passed through as chunks unchanged.
|
|
322
|
+
|
|
323
|
+
Parameters
|
|
324
|
+
----------
|
|
325
|
+
a : array-like
|
|
326
|
+
Input data, in any form that can be converted to a dask array. This
|
|
327
|
+
includes lists, lists of tuples, tuples, tuples of tuples, tuples of
|
|
328
|
+
lists and ndarrays.
|
|
329
|
+
dtype : data-type, optional
|
|
330
|
+
By default, the data-type is inferred from the input data.
|
|
331
|
+
order : {'C', 'F', 'A', 'K'}, optional
|
|
332
|
+
Memory layout. 'A' and 'K' depend on the order of input array a.
|
|
333
|
+
'C' row-major (C-style), 'F' column-major (Fortran-style) memory
|
|
334
|
+
representation. 'A' (any) means 'F' if a is Fortran contiguous, 'C'
|
|
335
|
+
otherwise 'K' (keep) preserve input order. Defaults to 'C'.
|
|
336
|
+
like: array-like
|
|
337
|
+
Reference object to allow the creation of Dask arrays with chunks
|
|
338
|
+
that are not NumPy arrays. If an array-like passed in as ``like``
|
|
339
|
+
supports the ``__array_function__`` protocol, the chunk type of the
|
|
340
|
+
resulting array will be defined by it. In this case, it ensures the
|
|
341
|
+
creation of a Dask array compatible with that passed in via this
|
|
342
|
+
argument. If ``like`` is a Dask array, the chunk type of the
|
|
343
|
+
resulting array will be defined by the chunk type of ``like``.
|
|
344
|
+
Requires NumPy 1.20.0 or higher.
|
|
345
|
+
inline_array:
|
|
346
|
+
Whether to inline the array in the resulting dask graph. For more information,
|
|
347
|
+
see the documentation for ``dask.array.from_array()``.
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
out : dask array
|
|
352
|
+
Dask array interpretation of a.
|
|
353
|
+
|
|
354
|
+
Examples
|
|
355
|
+
--------
|
|
356
|
+
>>> import dask_array as da
|
|
357
|
+
>>> import numpy as np
|
|
358
|
+
>>> x = np.arange(3)
|
|
359
|
+
>>> da.asanyarray(x)
|
|
360
|
+
dask.array<array, shape=(3,), dtype=int64, chunksize=(3,), chunktype=numpy.ndarray>
|
|
361
|
+
|
|
362
|
+
>>> y = [[1, 2, 3], [4, 5, 6]]
|
|
363
|
+
>>> da.asanyarray(y)
|
|
364
|
+
dask.array<array, shape=(2, 3), dtype=int64, chunksize=(2, 3), chunktype=numpy.ndarray>
|
|
365
|
+
|
|
366
|
+
.. warning::
|
|
367
|
+
`order` is ignored if `a` is an `Array`, has the attribute ``to_dask_array``,
|
|
368
|
+
or is a list or tuple of `Array`'s.
|
|
369
|
+
"""
|
|
370
|
+
# Lazy imports to avoid circular dependencies
|
|
371
|
+
from dask_array._collection import Array
|
|
372
|
+
|
|
373
|
+
if like is None:
|
|
374
|
+
if isinstance(a, Array):
|
|
375
|
+
return _as_dtype(a, dtype)
|
|
376
|
+
elif hasattr(a, "to_dask_array"):
|
|
377
|
+
return _as_dtype(a.to_dask_array(), dtype)
|
|
378
|
+
elif type(a).__module__.split(".")[0] == "xarray" and hasattr(a, "data"):
|
|
379
|
+
return _as_dtype(asarray(a.data, order=order), dtype)
|
|
380
|
+
elif isinstance(a, (list, tuple)) and any(isinstance(i, Array) for i in a):
|
|
381
|
+
# Lazy import to avoid circular dependency
|
|
382
|
+
from dask_array.stacking import stack
|
|
383
|
+
|
|
384
|
+
return _as_dtype(stack(a), dtype)
|
|
385
|
+
elif not isinstance(getattr(a, "shape", None), Iterable):
|
|
386
|
+
a = np.asanyarray(a, dtype=dtype, order=order)
|
|
387
|
+
else:
|
|
388
|
+
from functools import partial
|
|
389
|
+
|
|
390
|
+
from dask_array._utils import asanyarray_safe
|
|
391
|
+
|
|
392
|
+
like_meta = meta_from_array(like)
|
|
393
|
+
if isinstance(a, Array):
|
|
394
|
+
# Use partial to pass dtype to asanyarray_safe, not to map_blocks
|
|
395
|
+
# (map_blocks' dtype parameter controls output metadata, not the function call)
|
|
396
|
+
return a.map_blocks(partial(asanyarray_safe, like=like_meta, dtype=dtype, order=order))
|
|
397
|
+
else:
|
|
398
|
+
a = asanyarray_safe(a, like=like_meta, dtype=dtype, order=order)
|
|
399
|
+
|
|
400
|
+
a = from_array(
|
|
401
|
+
a,
|
|
402
|
+
chunks=a.shape,
|
|
403
|
+
getitem=getter_inline,
|
|
404
|
+
asarray=False,
|
|
405
|
+
inline_array=inline_array,
|
|
406
|
+
)
|
|
407
|
+
return _as_dtype(a, dtype)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def array(x, dtype=None, ndmin=None, *, like=None):
|
|
411
|
+
"""Create a dask array from an array-like object.
|
|
412
|
+
|
|
413
|
+
See Also
|
|
414
|
+
--------
|
|
415
|
+
numpy.array
|
|
416
|
+
"""
|
|
417
|
+
x = asarray(x, like=like)
|
|
418
|
+
while ndmin is not None and x.ndim < ndmin:
|
|
419
|
+
x = x[None, :]
|
|
420
|
+
if dtype is not None and x.dtype != dtype:
|
|
421
|
+
x = x.astype(dtype)
|
|
422
|
+
return x
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Create array from existing task graph."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dask._task_spec import Alias
|
|
6
|
+
from dask._task_spec import GraphNode
|
|
7
|
+
from dask._task_spec import TaskRef
|
|
8
|
+
from dask_array._new_collection import new_collection
|
|
9
|
+
from dask_array.io import FromGraph
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _dependency_keys_in_layer(layer, name):
|
|
13
|
+
keys = set()
|
|
14
|
+
for value in layer.values():
|
|
15
|
+
stack = [value]
|
|
16
|
+
while stack:
|
|
17
|
+
value = stack.pop()
|
|
18
|
+
if isinstance(value, TaskRef):
|
|
19
|
+
value = value.key
|
|
20
|
+
if isinstance(value, GraphNode):
|
|
21
|
+
stack.extend(value.dependencies)
|
|
22
|
+
elif isinstance(value, tuple):
|
|
23
|
+
if value and value[0] == name:
|
|
24
|
+
keys.add(value)
|
|
25
|
+
elif value and callable(value[0]):
|
|
26
|
+
stack.extend(value[1:])
|
|
27
|
+
elif isinstance(value, (list, set)):
|
|
28
|
+
stack.extend(value)
|
|
29
|
+
elif isinstance(value, dict):
|
|
30
|
+
stack.extend(value.values())
|
|
31
|
+
return keys
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def from_graph(layer, _meta, chunks, keys, name_prefix, dependencies=()):
|
|
35
|
+
"""Create a dask array from an existing task graph.
|
|
36
|
+
|
|
37
|
+
This is primarily used internally for reconstructing arrays after
|
|
38
|
+
persistence or when recreating arrays from lowered expressions.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
layer : dict or HighLevelGraph
|
|
43
|
+
The task graph layer containing the array data
|
|
44
|
+
_meta : array-like
|
|
45
|
+
Metadata array describing the dtype and type of chunks
|
|
46
|
+
chunks : tuple of tuples
|
|
47
|
+
Chunk sizes for each dimension
|
|
48
|
+
keys : list
|
|
49
|
+
Flattened list of task keys
|
|
50
|
+
name_prefix : str
|
|
51
|
+
Prefix for generating the array name
|
|
52
|
+
dependencies : sequence, optional
|
|
53
|
+
Dask-array collections or expressions that provide keys referenced by
|
|
54
|
+
``layer``.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
Array
|
|
59
|
+
A new dask Array wrapping the provided graph
|
|
60
|
+
"""
|
|
61
|
+
expr_dependencies = []
|
|
62
|
+
aliases = {}
|
|
63
|
+
layer_dict = None
|
|
64
|
+
for dep in dependencies:
|
|
65
|
+
expr = getattr(dep, "expr", dep)
|
|
66
|
+
lowered = (
|
|
67
|
+
expr.lower_completely()
|
|
68
|
+
if hasattr(expr, "lower_completely")
|
|
69
|
+
else expr
|
|
70
|
+
)
|
|
71
|
+
expr_dependencies.append(lowered)
|
|
72
|
+
if getattr(lowered, "_name", None) == getattr(expr, "_name", None):
|
|
73
|
+
continue
|
|
74
|
+
if layer_dict is None:
|
|
75
|
+
layer_dict = dict(layer)
|
|
76
|
+
for old_key in _dependency_keys_in_layer(layer_dict, expr._name):
|
|
77
|
+
if len(old_key) != len(expr.numblocks) + 1:
|
|
78
|
+
continue
|
|
79
|
+
if not all(isinstance(i, int) for i in old_key[1:]):
|
|
80
|
+
continue
|
|
81
|
+
new_key = (lowered._name, *old_key[1:])
|
|
82
|
+
aliases[old_key] = Alias(old_key, new_key)
|
|
83
|
+
|
|
84
|
+
if aliases:
|
|
85
|
+
layer = layer_dict
|
|
86
|
+
layer.update(aliases)
|
|
87
|
+
|
|
88
|
+
return new_collection(
|
|
89
|
+
FromGraph(
|
|
90
|
+
layer=layer,
|
|
91
|
+
_meta=_meta,
|
|
92
|
+
chunks=chunks,
|
|
93
|
+
keys=keys,
|
|
94
|
+
name_prefix=name_prefix,
|
|
95
|
+
_dependencies=tuple(expr_dependencies),
|
|
96
|
+
)
|
|
97
|
+
)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Array creation functions for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ._arange import Arange, arange
|
|
6
|
+
from ._diag import Diag1D, Diag2DSimple, diag
|
|
7
|
+
from ._diagonal import Diagonal, diagonal
|
|
8
|
+
from ._eye import Eye, eye
|
|
9
|
+
from ._linspace import Linspace, linspace
|
|
10
|
+
from ._mesh import fromfunction, indices, meshgrid
|
|
11
|
+
from ._ones_zeros import (
|
|
12
|
+
BroadcastTrick,
|
|
13
|
+
Empty,
|
|
14
|
+
Full,
|
|
15
|
+
Ones,
|
|
16
|
+
Zeros,
|
|
17
|
+
empty,
|
|
18
|
+
empty_like,
|
|
19
|
+
full,
|
|
20
|
+
full_like,
|
|
21
|
+
ones,
|
|
22
|
+
ones_like,
|
|
23
|
+
wrap,
|
|
24
|
+
wrap_func_shape_as_first_arg,
|
|
25
|
+
zeros,
|
|
26
|
+
zeros_like,
|
|
27
|
+
)
|
|
28
|
+
from ._pad import pad
|
|
29
|
+
from ._repeat import repeat
|
|
30
|
+
from ._tile import tile
|
|
31
|
+
from ._tri import tri
|
|
32
|
+
from ._utils import to_backend
|
|
33
|
+
|
|
34
|
+
__all__ = [
|
|
35
|
+
# Classes
|
|
36
|
+
"Arange",
|
|
37
|
+
"BroadcastTrick",
|
|
38
|
+
"Diag1D",
|
|
39
|
+
"Diag2DSimple",
|
|
40
|
+
"Diagonal",
|
|
41
|
+
"Empty",
|
|
42
|
+
"Eye",
|
|
43
|
+
"Full",
|
|
44
|
+
"Linspace",
|
|
45
|
+
"Ones",
|
|
46
|
+
"Zeros",
|
|
47
|
+
# Functions
|
|
48
|
+
"arange",
|
|
49
|
+
"diag",
|
|
50
|
+
"diagonal",
|
|
51
|
+
"empty",
|
|
52
|
+
"empty_like",
|
|
53
|
+
"eye",
|
|
54
|
+
"fromfunction",
|
|
55
|
+
"full",
|
|
56
|
+
"full_like",
|
|
57
|
+
"indices",
|
|
58
|
+
"linspace",
|
|
59
|
+
"meshgrid",
|
|
60
|
+
"ones",
|
|
61
|
+
"ones_like",
|
|
62
|
+
"pad",
|
|
63
|
+
"repeat",
|
|
64
|
+
"tile",
|
|
65
|
+
"to_backend",
|
|
66
|
+
"tri",
|
|
67
|
+
"wrap",
|
|
68
|
+
"wrap_func_shape_as_first_arg",
|
|
69
|
+
"zeros",
|
|
70
|
+
"zeros_like",
|
|
71
|
+
]
|