dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pickle
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from dask import core as dask_core
|
|
10
|
+
from dask.base import compute_as_if_collection
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def to_npy_stack(dirname, x, axis=0):
|
|
14
|
+
"""Write dask array to a stack of .npy files
|
|
15
|
+
|
|
16
|
+
This partitions the dask.array along one axis and stores each block along
|
|
17
|
+
that axis as a single .npy file in the specified directory
|
|
18
|
+
|
|
19
|
+
Examples
|
|
20
|
+
--------
|
|
21
|
+
>>> x = da.ones((5, 10, 10), chunks=(2, 4, 4)) # doctest: +SKIP
|
|
22
|
+
>>> da.to_npy_stack('data/', x, axis=0) # doctest: +SKIP
|
|
23
|
+
|
|
24
|
+
The ``.npy`` files store numpy arrays for ``x[0:2], x[2:4], and x[4:5]``
|
|
25
|
+
respectively, as is specified by the chunk size along the zeroth axis::
|
|
26
|
+
|
|
27
|
+
$ tree data/
|
|
28
|
+
data/
|
|
29
|
+
|-- 0.npy
|
|
30
|
+
|-- 1.npy
|
|
31
|
+
|-- 2.npy
|
|
32
|
+
|-- info
|
|
33
|
+
|
|
34
|
+
The ``info`` file stores the dtype, chunks, and axis information of the array.
|
|
35
|
+
You can load these stacks with the :func:`dask.array.from_npy_stack` function.
|
|
36
|
+
|
|
37
|
+
>>> y = da.from_npy_stack('data/') # doctest: +SKIP
|
|
38
|
+
|
|
39
|
+
See Also
|
|
40
|
+
--------
|
|
41
|
+
from_npy_stack
|
|
42
|
+
"""
|
|
43
|
+
from dask_array._collection import Array, rechunk
|
|
44
|
+
|
|
45
|
+
chunks = tuple((c if i == axis else (sum(c),)) for i, c in enumerate(x.chunks))
|
|
46
|
+
xx = rechunk(x, chunks)
|
|
47
|
+
|
|
48
|
+
if not os.path.exists(dirname):
|
|
49
|
+
os.mkdir(dirname)
|
|
50
|
+
|
|
51
|
+
meta = {"chunks": chunks, "dtype": x.dtype, "axis": axis}
|
|
52
|
+
|
|
53
|
+
with open(os.path.join(dirname, "info"), "wb") as f:
|
|
54
|
+
pickle.dump(meta, f)
|
|
55
|
+
|
|
56
|
+
name = f"to-npy-stack-{uuid.uuid1()}"
|
|
57
|
+
dsk = {
|
|
58
|
+
(name, i): (np.save, os.path.join(dirname, f"{i}.npy"), key)
|
|
59
|
+
for i, key in enumerate(dask_core.flatten(xx.__dask_keys__()))
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Merge the dependency graph with our new tasks
|
|
63
|
+
full_dsk = dict(xx.__dask_graph__())
|
|
64
|
+
full_dsk.update(dsk)
|
|
65
|
+
compute_as_if_collection(Array, full_dsk, list(dsk))
|
dask_array/io/_zarr.py
ADDED
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import warnings
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from dask.base import tokenize
|
|
9
|
+
|
|
10
|
+
from dask_array._core_utils import normalize_chunks, unknown_chunk_message
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PerformanceWarning(Warning):
|
|
14
|
+
"""A warning given when bad chunking may cause poor performance."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _zarr_v3() -> bool:
|
|
18
|
+
"""Check if zarr version is 3.x or higher."""
|
|
19
|
+
try:
|
|
20
|
+
import zarr
|
|
21
|
+
from packaging.version import Version
|
|
22
|
+
except ImportError:
|
|
23
|
+
return False
|
|
24
|
+
else:
|
|
25
|
+
return Version(zarr.__version__).major >= 3
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _setup_zarr_store(url: str, storage_options: dict[str, object] | None = None, **kwargs: object):
|
|
29
|
+
"""
|
|
30
|
+
Set up a Zarr store for reading or writing, handling both Zarr v2 and v3.
|
|
31
|
+
|
|
32
|
+
This function prepares a Zarr-compatible storage object (`store`) from a URL or existing
|
|
33
|
+
store. It supports optional storage options for fsspec-based stores and automatically
|
|
34
|
+
selects the appropriate store type depending on the Zarr version.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
url: Zarr Array or str or MutableMapping
|
|
39
|
+
Location of the data. A URL can include a protocol specifier like s3://
|
|
40
|
+
for remote data. Can also be any MutableMapping instance, which should
|
|
41
|
+
be serializable if used in multiple processes.
|
|
42
|
+
storage_options: dict | None, default = None
|
|
43
|
+
Any additional parameters for the storage backend (ignored for local
|
|
44
|
+
paths)
|
|
45
|
+
**kwargs:
|
|
46
|
+
Passed to determine whether the store should be readonly by evaluating the following:
|
|
47
|
+
'kwargs.pop("read_only", kwargs.pop("mode", "a") == "r")'
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
store : zarr.store.Store or original url
|
|
52
|
+
A Zarr-compatible store object. Can be:
|
|
53
|
+
- `zarr.storage.FsspecStore` for Zarr v3 with storage options
|
|
54
|
+
- `zarr.storage.FSStore` for Zarr v2 with storage options
|
|
55
|
+
- The original URL/path if no storage options are provided
|
|
56
|
+
"""
|
|
57
|
+
# Cannot directly import FSStore from storage.
|
|
58
|
+
from zarr import storage
|
|
59
|
+
|
|
60
|
+
if storage_options is not None:
|
|
61
|
+
if _zarr_v3():
|
|
62
|
+
read_only = kwargs.pop("read_only", kwargs.pop("mode", "a") == "r")
|
|
63
|
+
store = storage.FsspecStore.from_url(url, read_only=read_only, storage_options=storage_options)
|
|
64
|
+
else:
|
|
65
|
+
store = storage.FSStore(url, **storage_options)
|
|
66
|
+
else:
|
|
67
|
+
store = url
|
|
68
|
+
return store
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def from_zarr(
|
|
72
|
+
url,
|
|
73
|
+
component=None,
|
|
74
|
+
storage_options=None,
|
|
75
|
+
chunks=None,
|
|
76
|
+
name=None,
|
|
77
|
+
inline_array=False,
|
|
78
|
+
**kwargs,
|
|
79
|
+
):
|
|
80
|
+
"""Load array from the zarr storage format
|
|
81
|
+
|
|
82
|
+
See https://zarr.readthedocs.io for details about the format.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
url: Zarr Array or str or MutableMapping
|
|
87
|
+
Location of the data. A URL can include a protocol specifier like s3://
|
|
88
|
+
for remote data. Can also be any MutableMapping instance, which should
|
|
89
|
+
be serializable if used in multiple processes.
|
|
90
|
+
component: str or None
|
|
91
|
+
If the location is a zarr group rather than an array, this is the
|
|
92
|
+
subcomponent that should be loaded, something like ``'foo/bar'``.
|
|
93
|
+
storage_options: dict
|
|
94
|
+
Any additional parameters for the storage backend (ignored for local
|
|
95
|
+
paths)
|
|
96
|
+
chunks: tuple of ints or tuples of ints
|
|
97
|
+
Passed to :func:`dask_array.from_array`, allows setting the chunks on
|
|
98
|
+
initialisation, if the chunking scheme in the on-disc dataset is not
|
|
99
|
+
optimal for the calculations to follow.
|
|
100
|
+
name : str, optional
|
|
101
|
+
An optional keyname for the array. Defaults to hashing the input
|
|
102
|
+
kwargs:
|
|
103
|
+
Passed to :class:`zarr.core.Array`.
|
|
104
|
+
inline_array : bool, default False
|
|
105
|
+
Whether to inline the zarr Array in the values of the task graph.
|
|
106
|
+
See :meth:`dask_array.from_array` for an explanation.
|
|
107
|
+
|
|
108
|
+
See Also
|
|
109
|
+
--------
|
|
110
|
+
from_array
|
|
111
|
+
"""
|
|
112
|
+
import zarr
|
|
113
|
+
|
|
114
|
+
from dask_array.core import from_array
|
|
115
|
+
|
|
116
|
+
storage_options = storage_options or {}
|
|
117
|
+
if isinstance(url, zarr.Array):
|
|
118
|
+
z = url
|
|
119
|
+
elif isinstance(url, (str, os.PathLike)):
|
|
120
|
+
if isinstance(url, os.PathLike):
|
|
121
|
+
url = os.fspath(url)
|
|
122
|
+
|
|
123
|
+
zarr_store = _setup_zarr_store(url, storage_options, **kwargs)
|
|
124
|
+
z = zarr.open_array(store=zarr_store, path=component, **kwargs)
|
|
125
|
+
else:
|
|
126
|
+
z = zarr.open_array(store=url, path=component, **kwargs)
|
|
127
|
+
chunks = chunks if chunks is not None else z.chunks
|
|
128
|
+
if name is None:
|
|
129
|
+
name = "from-zarr-" + tokenize(z, component, storage_options, chunks, **kwargs)
|
|
130
|
+
return from_array(z, chunks, name=name, inline_array=inline_array)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _get_zarr_write_chunks(zarr_array) -> tuple[int, ...]:
|
|
134
|
+
"""Get the appropriate chunk shape for writing to a Zarr array.
|
|
135
|
+
|
|
136
|
+
For Zarr v3 arrays with sharding, returns the shard shape.
|
|
137
|
+
For arrays without sharding, returns the chunk shape.
|
|
138
|
+
For Zarr v2 arrays, returns the chunk shape.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
zarr_array : zarr.Array
|
|
143
|
+
The target zarr array
|
|
144
|
+
|
|
145
|
+
Returns
|
|
146
|
+
-------
|
|
147
|
+
tuple
|
|
148
|
+
The chunk shape to use for rechunking the dask array
|
|
149
|
+
"""
|
|
150
|
+
# Zarr V3 array with shards
|
|
151
|
+
if hasattr(zarr_array, "shards") and zarr_array.shards is not None:
|
|
152
|
+
return zarr_array.shards
|
|
153
|
+
# Zarr V3 array without shards, or Zarr V2 array
|
|
154
|
+
return zarr_array.chunks
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _check_regular_chunks(chunkset):
|
|
158
|
+
"""Check if the chunks are regular
|
|
159
|
+
|
|
160
|
+
"Regular" in this context means that along every axis, the chunks all
|
|
161
|
+
have the same size, except the last one, which may be smaller
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
chunkset: tuple of tuples of ints
|
|
166
|
+
From the ``.chunks`` attribute of an ``Array``
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
True if chunkset passes, else False
|
|
171
|
+
|
|
172
|
+
Examples
|
|
173
|
+
--------
|
|
174
|
+
>>> _check_regular_chunks(((5, 5),))
|
|
175
|
+
True
|
|
176
|
+
|
|
177
|
+
>>> _check_regular_chunks(((3, 3, 3, 1),))
|
|
178
|
+
True
|
|
179
|
+
|
|
180
|
+
>>> _check_regular_chunks(((3, 1, 3, 3),))
|
|
181
|
+
False
|
|
182
|
+
"""
|
|
183
|
+
for chunks in chunkset:
|
|
184
|
+
if len(chunks) == 1:
|
|
185
|
+
continue
|
|
186
|
+
if len(set(chunks[:-1])) > 1:
|
|
187
|
+
return False
|
|
188
|
+
if chunks[-1] > chunks[0]:
|
|
189
|
+
return False
|
|
190
|
+
return True
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _write_dask_to_existing_zarr(url, arr, region, zarr_mem_store_types, compute, return_stored):
|
|
194
|
+
"""Write dask array to existing zarr store.
|
|
195
|
+
|
|
196
|
+
Parameters
|
|
197
|
+
----------
|
|
198
|
+
url: zarr.Array
|
|
199
|
+
The zarr array.
|
|
200
|
+
arr:
|
|
201
|
+
The dask array to be stored
|
|
202
|
+
region: tuple of slices or None
|
|
203
|
+
The region of data that should be written if ``url`` is a zarr.Array.
|
|
204
|
+
Not to be used with other types of ``url``.
|
|
205
|
+
zarr_mem_store_types: tuple[Type[dict] | Type[zarr.storage.MemoryStore] | Type[zarr.storage.KVStore], ...]
|
|
206
|
+
The type of zarr memory store that is allowed.
|
|
207
|
+
compute: bool
|
|
208
|
+
See :func:`~dask_array.store` for more details.
|
|
209
|
+
return_stored: bool
|
|
210
|
+
See :func:`~dask_array.store` for more details.
|
|
211
|
+
|
|
212
|
+
Returns
|
|
213
|
+
-------
|
|
214
|
+
If return_stored=True
|
|
215
|
+
tuple of Arrays
|
|
216
|
+
If return_stored=False and compute=True
|
|
217
|
+
None
|
|
218
|
+
If return_stored=False and compute=False
|
|
219
|
+
Delayed
|
|
220
|
+
"""
|
|
221
|
+
from dask_array.slicing._utils import new_blockdim, normalize_index
|
|
222
|
+
|
|
223
|
+
z = url
|
|
224
|
+
if isinstance(z.store, zarr_mem_store_types):
|
|
225
|
+
try:
|
|
226
|
+
from distributed import default_client
|
|
227
|
+
|
|
228
|
+
default_client()
|
|
229
|
+
except (ImportError, ValueError):
|
|
230
|
+
pass
|
|
231
|
+
else:
|
|
232
|
+
raise RuntimeError("Cannot store into in memory Zarr Array using the distributed scheduler.")
|
|
233
|
+
zarr_write_chunks = _get_zarr_write_chunks(z)
|
|
234
|
+
dask_write_chunks = normalize_chunks(
|
|
235
|
+
chunks="auto",
|
|
236
|
+
shape=z.shape,
|
|
237
|
+
dtype=z.dtype,
|
|
238
|
+
previous_chunks=zarr_write_chunks,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if region is not None:
|
|
242
|
+
index = normalize_index(region, z.shape)
|
|
243
|
+
dask_write_chunks = tuple(tuple(new_blockdim(s, c, r)) for s, c, r in zip(z.shape, dask_write_chunks, index))
|
|
244
|
+
|
|
245
|
+
for ax, (dw, zw) in enumerate(zip(dask_write_chunks, zarr_write_chunks, strict=True)):
|
|
246
|
+
if len(dw) >= 1:
|
|
247
|
+
nominal_dask_chunk_size = dw[0]
|
|
248
|
+
if not nominal_dask_chunk_size % zw == 0:
|
|
249
|
+
safe_chunk_size = np.prod(zarr_write_chunks) * max(1, z.dtype.itemsize)
|
|
250
|
+
msg = (
|
|
251
|
+
f"The input Dask array will be rechunked along axis {ax} with chunk size "
|
|
252
|
+
f"{nominal_dask_chunk_size}, but a chunk size divisible by {zw} is "
|
|
253
|
+
f"required for Dask to write safely to the Zarr array {z}. "
|
|
254
|
+
"To avoid risk of data loss when writing to this Zarr array, set the "
|
|
255
|
+
'"array.chunk-size" configuration parameter to at least the size in'
|
|
256
|
+
" bytes of a single on-disk "
|
|
257
|
+
f"chunk (or shard) of the Zarr array, which in this case is "
|
|
258
|
+
f"{safe_chunk_size} bytes. "
|
|
259
|
+
f'E.g., dask.config.set({{"array.chunk-size": {safe_chunk_size}}})'
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
warnings.warn(
|
|
263
|
+
msg,
|
|
264
|
+
PerformanceWarning,
|
|
265
|
+
stacklevel=3,
|
|
266
|
+
)
|
|
267
|
+
break
|
|
268
|
+
|
|
269
|
+
arr = arr.rechunk(dask_write_chunks)
|
|
270
|
+
|
|
271
|
+
if region is not None:
|
|
272
|
+
regions = [region]
|
|
273
|
+
else:
|
|
274
|
+
regions = None
|
|
275
|
+
|
|
276
|
+
return arr.store(z, lock=False, regions=regions, compute=compute, return_stored=return_stored)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def to_zarr(
|
|
280
|
+
arr,
|
|
281
|
+
url,
|
|
282
|
+
component=None,
|
|
283
|
+
storage_options=None,
|
|
284
|
+
region=None,
|
|
285
|
+
compute=True,
|
|
286
|
+
return_stored=False,
|
|
287
|
+
zarr_array_kwargs=None,
|
|
288
|
+
zarr_read_kwargs=None,
|
|
289
|
+
**kwargs,
|
|
290
|
+
):
|
|
291
|
+
"""Save array to the zarr storage format
|
|
292
|
+
|
|
293
|
+
See https://zarr.readthedocs.io for details about the format.
|
|
294
|
+
|
|
295
|
+
Parameters
|
|
296
|
+
----------
|
|
297
|
+
arr: dask.array
|
|
298
|
+
Data to store
|
|
299
|
+
url: Zarr Array or str or MutableMapping
|
|
300
|
+
Location of the data. A URL can include a protocol specifier like s3://
|
|
301
|
+
for remote data. Can also be any MutableMapping instance, which should
|
|
302
|
+
be serializable if used in multiple processes.
|
|
303
|
+
component: str or None
|
|
304
|
+
If the location is a zarr group rather than an array, this is the
|
|
305
|
+
subcomponent that should be created/over-written. If both `component`
|
|
306
|
+
and 'name' in `zarr_array_kwargs` are specified, `component` takes
|
|
307
|
+
precedence. This will change in a future version.
|
|
308
|
+
storage_options: dict
|
|
309
|
+
Any additional parameters for the storage backend (ignored for local
|
|
310
|
+
paths)
|
|
311
|
+
overwrite: bool
|
|
312
|
+
If given array already exists, overwrite=False will cause an error,
|
|
313
|
+
where overwrite=True will replace the existing data. Deprecated, please
|
|
314
|
+
add to zarr_kwargs
|
|
315
|
+
region: tuple of slices or None
|
|
316
|
+
The region of data that should be written if ``url`` is a zarr.Array.
|
|
317
|
+
Not to be used with other types of ``url``.
|
|
318
|
+
compute: bool
|
|
319
|
+
See :func:`~dask_array.store` for more details.
|
|
320
|
+
return_stored: bool
|
|
321
|
+
See :func:`~dask_array.store` for more details.
|
|
322
|
+
zarr_array_kwargs: dict or None
|
|
323
|
+
Keyword arguments passed to :func:`zarr.create_array` (for zarr v3) or
|
|
324
|
+
:func:`zarr.create` (for zarr v2). This function automatically sets
|
|
325
|
+
``shape``, ``chunks``, and ``dtype`` based on the dask array, but these
|
|
326
|
+
can be overridden.
|
|
327
|
+
|
|
328
|
+
Common options include:
|
|
329
|
+
|
|
330
|
+
- ``compressor``: Compression algorithm (e.g., ``zarr.Blosc()``)
|
|
331
|
+
- ``filters``: List of filters to apply
|
|
332
|
+
- ``fill_value``: Value to use for uninitialized portions
|
|
333
|
+
- ``order``: Memory layout ('C' or 'F')
|
|
334
|
+
- ``dimension_separator``: Separator for chunk keys ('/' or '.')
|
|
335
|
+
|
|
336
|
+
For the complete list of available arguments, see the zarr documentation:
|
|
337
|
+
|
|
338
|
+
- zarr v3: https://zarr.readthedocs.io/en/stable/api/zarr/index.html#zarr.create_array
|
|
339
|
+
- zarr v2: https://zarr.readthedocs.io/en/stable/api/core.html#zarr.create
|
|
340
|
+
zarr_read_kwargs: dict or None
|
|
341
|
+
Keyword arguments passed to the storage backend when creating a zarr
|
|
342
|
+
store from a URL string. Only used when ``url`` is a string (not when
|
|
343
|
+
``url`` is already a zarr.Array or MutableMapping instance).
|
|
344
|
+
|
|
345
|
+
Common options include:
|
|
346
|
+
|
|
347
|
+
- ``mode``: File access mode. Options include:
|
|
348
|
+
- ``'r'``: Read-only, must exist
|
|
349
|
+
- ``'r+'``: Read/write, must exist
|
|
350
|
+
- ``'a'``: Read/write, create if doesn't exist (default)
|
|
351
|
+
- ``'w'``: Create, remove existing data if present
|
|
352
|
+
- ``'w-'``: Create, fail if exists
|
|
353
|
+
- ``read_only``: If True, open the store in read-only mode (alternative to ``mode='r'``)
|
|
354
|
+
|
|
355
|
+
Additional backend-specific options may be available depending on the
|
|
356
|
+
storage system (e.g., fsspec parameters for cloud storage).
|
|
357
|
+
**kwargs:
|
|
358
|
+
.. deprecated:: 2025.12.0
|
|
359
|
+
Passing storage-related arguments via **kwargs is deprecated.
|
|
360
|
+
Please use the ``zarr_read_kwargs`` parameter instead.
|
|
361
|
+
|
|
362
|
+
Raises
|
|
363
|
+
------
|
|
364
|
+
ValueError
|
|
365
|
+
If ``arr`` has unknown chunk sizes, which is not supported by Zarr.
|
|
366
|
+
If ``region`` is specified and ``url`` is not a zarr.Array
|
|
367
|
+
|
|
368
|
+
See Also
|
|
369
|
+
--------
|
|
370
|
+
dask_array.store
|
|
371
|
+
dask_array.Array.compute_chunk_sizes
|
|
372
|
+
|
|
373
|
+
"""
|
|
374
|
+
import zarr
|
|
375
|
+
|
|
376
|
+
if np.isnan(arr.shape).any():
|
|
377
|
+
raise ValueError(
|
|
378
|
+
f"Saving a dask array with unknown chunk sizes is not currently supported by Zarr.{unknown_chunk_message}"
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
zarr_array_kwargs = {} if zarr_array_kwargs is None else dict(zarr_array_kwargs)
|
|
382
|
+
if component is not None and "name" in zarr_array_kwargs:
|
|
383
|
+
raise ValueError(
|
|
384
|
+
"Cannot specify both 'component' and 'name' in zarr_array_kwargs. Please use 'name' in zarr_array_kwargs"
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
if kwargs:
|
|
388
|
+
warnings.warn(
|
|
389
|
+
"Passing storage-related arguments via **kwargs is deprecated. "
|
|
390
|
+
"Please use the 'zarr_store_kwargs' parameter instead. **kwargs will be "
|
|
391
|
+
"removed in a future version.",
|
|
392
|
+
FutureWarning,
|
|
393
|
+
stacklevel=2,
|
|
394
|
+
)
|
|
395
|
+
if zarr_read_kwargs is None:
|
|
396
|
+
zarr_read_kwargs = kwargs
|
|
397
|
+
else:
|
|
398
|
+
zarr_read_kwargs = {**kwargs, **zarr_read_kwargs}
|
|
399
|
+
|
|
400
|
+
if _zarr_v3():
|
|
401
|
+
zarr_mem_store_types = (zarr.storage.MemoryStore,)
|
|
402
|
+
else:
|
|
403
|
+
zarr_mem_store_types = (dict, zarr.storage.MemoryStore, zarr.storage.KVStore)
|
|
404
|
+
|
|
405
|
+
if isinstance(url, zarr.Array):
|
|
406
|
+
return _write_dask_to_existing_zarr(url, arr, region, zarr_mem_store_types, compute, return_stored)
|
|
407
|
+
|
|
408
|
+
if not _check_regular_chunks(arr.chunks):
|
|
409
|
+
warnings.warn(
|
|
410
|
+
"The array uses irregular chunk sizes. Rechunking to regular (uniform) chunks "
|
|
411
|
+
"to ensure the data can be written safely. If you want to avoid this automatic "
|
|
412
|
+
"rechunking, manually rechunk the array so that all chunks, except possibly the "
|
|
413
|
+
"final chunk, in each dimension—have the same size (e.g., arr = arr.rechunk(...)).",
|
|
414
|
+
UserWarning,
|
|
415
|
+
stacklevel=2,
|
|
416
|
+
)
|
|
417
|
+
# We almost certainly get here because auto chunking has been used
|
|
418
|
+
# on irregular chunks. The max will then be smaller than auto, so using
|
|
419
|
+
# max is a safe choice
|
|
420
|
+
arr = arr.rechunk(tuple(map(max, arr.chunks)))
|
|
421
|
+
|
|
422
|
+
if region is not None:
|
|
423
|
+
raise ValueError("Cannot use `region` keyword when url is not a `zarr.Array`.")
|
|
424
|
+
|
|
425
|
+
zarr_read_kwargs = {} if zarr_read_kwargs is None else dict(zarr_read_kwargs)
|
|
426
|
+
zarr_store = _setup_zarr_store(url, storage_options, **zarr_read_kwargs)
|
|
427
|
+
|
|
428
|
+
zarr_array_kwargs.setdefault("shape", arr.shape)
|
|
429
|
+
zarr_array_kwargs.setdefault("chunks", tuple(c[0] for c in arr.chunks))
|
|
430
|
+
zarr_array_kwargs.setdefault("dtype", arr.dtype)
|
|
431
|
+
|
|
432
|
+
array_name = component or zarr_array_kwargs.pop("name", None)
|
|
433
|
+
if _zarr_v3():
|
|
434
|
+
root = zarr.open_group(store=zarr_store, mode="a") if array_name else None
|
|
435
|
+
if array_name:
|
|
436
|
+
z = root.create_array(name=array_name, **zarr_array_kwargs)
|
|
437
|
+
else:
|
|
438
|
+
zarr_array_kwargs["store"] = zarr_store
|
|
439
|
+
z = zarr.create_array(**zarr_array_kwargs)
|
|
440
|
+
else:
|
|
441
|
+
# TODO: drop this as soon as zarr v2 gets dropped.
|
|
442
|
+
# https://github.com/dask/dask/issues/12188
|
|
443
|
+
z = zarr.create(
|
|
444
|
+
store=zarr_store,
|
|
445
|
+
path=array_name,
|
|
446
|
+
**zarr_array_kwargs,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
return arr.store(z, lock=False, compute=compute, return_stored=return_stored)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Linear algebra submodule for array-expr.
|
|
2
|
+
|
|
3
|
+
This module provides native expression-based implementations of linear algebra
|
|
4
|
+
operations for the array-expr system.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dask_array.linalg._cholesky import cholesky
|
|
8
|
+
from dask_array.linalg._lu import lu
|
|
9
|
+
from dask_array.linalg._norm import norm
|
|
10
|
+
from dask_array.linalg._qr import qr, sfqr, tsqr
|
|
11
|
+
from dask_array.linalg._solve import inv, lstsq, solve, solve_triangular
|
|
12
|
+
from dask_array.linalg._svd import (
|
|
13
|
+
compression_level,
|
|
14
|
+
compression_matrix,
|
|
15
|
+
svd,
|
|
16
|
+
svd_compressed,
|
|
17
|
+
)
|
|
18
|
+
from dask_array.linalg._tensordot import dot, matmul, tensordot, vdot
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"cholesky",
|
|
22
|
+
"compression_level",
|
|
23
|
+
"compression_matrix",
|
|
24
|
+
"dot",
|
|
25
|
+
"inv",
|
|
26
|
+
"lstsq",
|
|
27
|
+
"lu",
|
|
28
|
+
"matmul",
|
|
29
|
+
"norm",
|
|
30
|
+
"qr",
|
|
31
|
+
"sfqr",
|
|
32
|
+
"solve",
|
|
33
|
+
"solve_triangular",
|
|
34
|
+
"svd",
|
|
35
|
+
"svd_compressed",
|
|
36
|
+
"tensordot",
|
|
37
|
+
"tsqr",
|
|
38
|
+
"vdot",
|
|
39
|
+
]
|