dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ from typing import TYPE_CHECKING
5
+
6
+ import numpy as np
7
+
8
+ from dask_array.io._base import IO
9
+ from dask_array._utils import meta_from_array
10
+
11
+ if TYPE_CHECKING:
12
+ pass
13
+
14
+
15
+ class FromDelayed(IO):
16
+ """Expression for creating an array from a delayed value."""
17
+
18
+ _parameters = ["value", "shape", "dtype", "_meta", "_name_prefix"]
19
+ _defaults = {"dtype": None, "_meta": None, "_name_prefix": None}
20
+
21
+ @functools.cached_property
22
+ def _meta(self):
23
+ meta = self.operand("_meta")
24
+ dtype = self.operand("dtype")
25
+ shape = self.operand("shape")
26
+ if meta is not None:
27
+ if dtype is None:
28
+ dtype = getattr(meta, "dtype", None)
29
+ return meta_from_array(meta, dtype=dtype)
30
+ if dtype is not None:
31
+ return np.empty((0,) * len(shape), dtype=dtype)
32
+ return np.empty((0,) * len(shape))
33
+
34
+ @functools.cached_property
35
+ def chunks(self):
36
+ return tuple((d,) for d in self.operand("shape"))
37
+
38
+ @functools.cached_property
39
+ def _name(self):
40
+ prefix = self.operand("_name_prefix")
41
+ if prefix:
42
+ return prefix
43
+ return "from-delayed-" + self.deterministic_token
44
+
45
+ def _layer(self):
46
+ from dask._task_spec import Alias
47
+ from dask.base import is_dask_collection
48
+
49
+ value = self.operand("value")
50
+ shape = self.operand("shape")
51
+ key = (self._name,) + (0,) * len(shape)
52
+ task = Alias(key=key, target=value.key)
53
+ result = {key: task}
54
+ # Include the delayed value's graph
55
+ if is_dask_collection(value):
56
+ result.update(value.__dask_graph__())
57
+ return result
58
+
59
+
60
+ def from_delayed(value, shape, dtype=None, meta=None, name=None):
61
+ """Create a dask array from a dask delayed value
62
+
63
+ This routine is useful for constructing dask arrays in an ad-hoc fashion
64
+ using dask delayed, particularly when combined with stack and concatenate.
65
+
66
+ The dask array will consist of a single chunk.
67
+
68
+ Examples
69
+ --------
70
+ >>> import dask
71
+ >>> import dask_array as da
72
+ >>> import numpy as np
73
+ >>> value = dask.delayed(np.ones)(5)
74
+ >>> array = da.from_delayed(value, (5,), dtype=float)
75
+ >>> array
76
+ dask.array<from-value, shape=(5,), dtype=float64, chunksize=(5,), chunktype=numpy.ndarray>
77
+ >>> array.compute()
78
+ array([1., 1., 1., 1., 1.])
79
+ """
80
+ from dask_array._new_collection import new_collection
81
+ from dask.delayed import Delayed, delayed
82
+
83
+ # Convert to Delayed if it has a key but isn't a Delayed
84
+ if not isinstance(value, Delayed) and hasattr(value, "key"):
85
+ value = delayed(value)
86
+
87
+ return new_collection(
88
+ FromDelayed(
89
+ value=value,
90
+ shape=shape,
91
+ dtype=dtype,
92
+ _meta=meta,
93
+ _name_prefix=name,
94
+ )
95
+ )
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+
5
+ from dask import istask
6
+ from dask_array._expr import ArrayExpr
7
+
8
+
9
+ class FromGraph(ArrayExpr):
10
+ _parameters = ["layer", "_meta", "chunks", "keys", "name_prefix", "_dependencies"]
11
+ _defaults = {"_dependencies": ()}
12
+
13
+ @functools.cached_property
14
+ def _meta(self):
15
+ return self.operand("_meta")
16
+
17
+ @functools.cached_property
18
+ def chunks(self):
19
+ return self.operand("chunks")
20
+
21
+ @functools.cached_property
22
+ def _name(self):
23
+ return self.operand("name_prefix") + "-" + self.deterministic_token
24
+
25
+ def dependencies(self):
26
+ return list(self.operand("_dependencies"))
27
+
28
+ def _layer(self):
29
+ layer = self.operand("layer")
30
+ our_keys = set(self.operand("keys"))
31
+ is_hlg = hasattr(layer, "layers")
32
+
33
+ # Persist case: layer is a dict of computed values with potentially
34
+ # different keys (optimization can change key names). Just rename.
35
+ if not is_hlg:
36
+ layer_keys = {k for k in layer if isinstance(k, tuple)}
37
+ if layer_keys and not (layer_keys & our_keys):
38
+ return {(self._name, *k[1:]) if isinstance(k, tuple) else k: v for k, v in layer.items()}
39
+
40
+ # HLG case (e.g., from BlockView): contains tasks and dependencies.
41
+ # Rename output keys and preserve dependency structure.
42
+ dsk = dict(layer)
43
+ result = {}
44
+ for k, v in dsk.items():
45
+ if k in our_keys:
46
+ new_key = (self._name, *k[1:])
47
+ if istask(v):
48
+ result[new_key] = k # Alias to original
49
+ result[k] = v # Keep original task
50
+ else:
51
+ result[new_key] = v # Simple rename
52
+ else:
53
+ result[k] = v # Dependency - keep as-is
54
+ return result
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import os
5
+ import pickle
6
+ from itertools import product
7
+
8
+ import numpy as np
9
+
10
+ from dask_array.io._base import IO
11
+
12
+
13
+ class FromNpyStack(IO):
14
+ """Expression for loading an array from a stack of .npy files."""
15
+
16
+ _parameters = ["dirname", "mmap_mode"]
17
+ _defaults = {"mmap_mode": "r"}
18
+
19
+ @functools.cached_property
20
+ def _info(self):
21
+ """Load and cache the info file."""
22
+ dirname = self.operand("dirname")
23
+ with open(os.path.join(dirname, "info"), "rb") as f:
24
+ return pickle.load(f)
25
+
26
+ @functools.cached_property
27
+ def _meta(self):
28
+ info = self._info
29
+ return np.empty((0,) * len(info["chunks"]), dtype=info["dtype"])
30
+
31
+ @functools.cached_property
32
+ def chunks(self):
33
+ return self._info["chunks"]
34
+
35
+ @functools.cached_property
36
+ def _name(self):
37
+ return "from-npy-stack-" + self.deterministic_token
38
+
39
+ def _layer(self):
40
+ dirname = self.operand("dirname")
41
+ mmap_mode = self.operand("mmap_mode")
42
+ info = self._info
43
+ chunks = info["chunks"]
44
+ axis = info["axis"]
45
+
46
+ keys = list(product([self._name], *[range(len(c)) for c in chunks]))
47
+ values = [(np.load, os.path.join(dirname, f"{i}.npy"), mmap_mode) for i in range(len(chunks[axis]))]
48
+ return dict(zip(keys, values))
49
+
50
+
51
+ def from_npy_stack(dirname, mmap_mode="r"):
52
+ """Load dask array from stack of npy files
53
+
54
+ Parameters
55
+ ----------
56
+ dirname: string
57
+ Directory of .npy files
58
+ mmap_mode: (None or 'r')
59
+ Read data in memory map mode
60
+
61
+ See Also
62
+ --------
63
+ to_npy_stack
64
+ """
65
+ from dask_array._new_collection import new_collection
66
+
67
+ return new_collection(FromNpyStack(dirname=dirname, mmap_mode=mmap_mode))
@@ -0,0 +1,336 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Collection
4
+ from threading import Lock
5
+ from typing import TYPE_CHECKING, Any, TypeVar, cast
6
+
7
+ import numpy as np
8
+
9
+ if TYPE_CHECKING:
10
+ from numpy.typing import ArrayLike
11
+
12
+ from dask.delayed import Delayed
13
+
14
+ from dask.base import named_schedulers
15
+ from dask.utils import SerializableLock
16
+
17
+ from dask_array._utils import is_arraylike
18
+ from dask_array.slicing._utils import fuse_slice
19
+
20
+
21
+ def get_scheduler_lock(collection, scheduler):
22
+ """Get an appropriate lock for the given collection and scheduler."""
23
+ if scheduler is None:
24
+ scheduler = collection.__dask_scheduler__
25
+ actual_get = named_schedulers.get(scheduler, scheduler)
26
+ # Only use locks for non-distributed schedulers
27
+ if actual_get is named_schedulers.get("synchronous", None):
28
+ return False
29
+ return SerializableLock()
30
+
31
+
32
+ def load_store_chunk(
33
+ x: Any,
34
+ out: Any,
35
+ index: slice | None,
36
+ region: slice | None,
37
+ lock: Any,
38
+ return_stored: bool,
39
+ load_stored: bool,
40
+ ) -> Any:
41
+ """
42
+ A function inserted in a Dask graph for storing a chunk.
43
+
44
+ Parameters
45
+ ----------
46
+ x: array-like
47
+ An array (potentially a NumPy one)
48
+ out: array-like
49
+ Where to store results.
50
+ index: slice-like
51
+ Where to store result from ``x`` in ``out``.
52
+ lock: Lock-like or False
53
+ Lock to use before writing to ``out``.
54
+ return_stored: bool
55
+ Whether to return ``out``.
56
+ load_stored: bool
57
+ Whether to return the array stored in ``out``.
58
+ Ignored if ``return_stored`` is not ``True``.
59
+
60
+ Returns
61
+ -------
62
+
63
+ If return_stored=True and load_stored=False
64
+ out
65
+ If return_stored=True and load_stored=True
66
+ out[index]
67
+ If return_stored=False and compute=False
68
+ None
69
+
70
+ Examples
71
+ --------
72
+
73
+ >>> a = np.ones((5, 6))
74
+ >>> b = np.empty(a.shape)
75
+ >>> load_store_chunk(a, b, (slice(None), slice(None)), None, False, False, False)
76
+ """
77
+ if region:
78
+ # Equivalent to `out[region][index]`
79
+ if index:
80
+ index = fuse_slice(region, index)
81
+ else:
82
+ index = region
83
+ if lock:
84
+ lock.acquire()
85
+ try:
86
+ if x is not None and x.size != 0:
87
+ if is_arraylike(x):
88
+ out[index] = x
89
+ else:
90
+ out[index] = np.asanyarray(x)
91
+
92
+ if return_stored and load_stored:
93
+ return out[index]
94
+ elif return_stored and not load_stored:
95
+ return out
96
+ else:
97
+ return None
98
+ finally:
99
+ if lock:
100
+ lock.release()
101
+
102
+
103
+ A = TypeVar("A", bound="ArrayLike")
104
+
105
+
106
+ def load_chunk(out: A, index: slice, lock: Any, region: slice | None) -> A:
107
+ """Load a chunk from an array-like object.
108
+
109
+ This is used for loading stored chunks back into dask arrays.
110
+ """
111
+ return load_store_chunk(
112
+ None,
113
+ out=out,
114
+ region=region,
115
+ index=index,
116
+ lock=lock,
117
+ return_stored=True,
118
+ load_stored=True,
119
+ )
120
+
121
+
122
+ def store(
123
+ sources,
124
+ targets,
125
+ lock: bool | Lock = True,
126
+ regions: tuple[slice, ...] | Collection[tuple[slice, ...]] | None = None,
127
+ compute: bool = True,
128
+ return_stored: bool = False,
129
+ load_stored: bool | None = None,
130
+ **kwargs,
131
+ ):
132
+ """Store dask arrays in array-like objects, overwrite data in target
133
+
134
+ This stores dask arrays into object that supports numpy-style setitem
135
+ indexing. It stores values chunk by chunk so that it does not have to
136
+ fill up memory. For best performance you can align the block size of
137
+ the storage target with the block size of your array.
138
+
139
+ If your data fits in memory then you may prefer calling
140
+ ``np.array(myarray)`` instead.
141
+
142
+ Parameters
143
+ ----------
144
+
145
+ sources: Array or collection of Arrays
146
+ targets: array-like or Delayed or collection of array-likes and/or Delayeds
147
+ These should support setitem syntax ``target[10:20] = ...``.
148
+ If sources is a single item, targets must be a single item; if sources is a
149
+ collection of arrays, targets must be a matching collection.
150
+ lock: boolean or threading.Lock, optional
151
+ Whether or not to lock the data stores while storing.
152
+ Pass True (lock each file individually), False (don't lock) or a
153
+ particular :class:`threading.Lock` object to be shared among all writes.
154
+ regions: tuple of slices or collection of tuples of slices, optional
155
+ Each ``region`` tuple in ``regions`` should be such that
156
+ ``target[region].shape = source.shape``
157
+ for the corresponding source and target in sources and targets,
158
+ respectively. If this is a tuple, the contents will be assumed to be
159
+ slices, so do not provide a tuple of tuples.
160
+ compute: boolean, optional
161
+ If true compute immediately; return :class:`dask.delayed.Delayed` otherwise.
162
+ return_stored: boolean, optional
163
+ Optionally return the stored result (default False).
164
+ load_stored: boolean, optional
165
+ Optionally return the stored result, loaded in to memory (default None).
166
+ If None, ``load_stored`` is True if ``return_stored`` is True and
167
+ ``compute`` is False. *This is an advanced option.*
168
+ When False, store will return the appropriate ``target`` for each chunk that is stored.
169
+ Directly computing this result is not what you want.
170
+ Instead, you can use the returned ``target`` to execute followup operations to the store.
171
+ kwargs:
172
+ Parameters passed to compute/persist (only used if compute=True)
173
+
174
+ Returns
175
+ -------
176
+
177
+ If return_stored=True
178
+ tuple of Arrays
179
+ If return_stored=False and compute=True
180
+ None
181
+ If return_stored=False and compute=False
182
+ Delayed
183
+
184
+ Examples
185
+ --------
186
+
187
+ >>> import h5py # doctest: +SKIP
188
+ >>> f = h5py.File('myfile.hdf5', mode='a') # doctest: +SKIP
189
+ >>> dset = f.create_dataset('/data', shape=x.shape,
190
+ ... chunks=x.chunks,
191
+ ... dtype='f8') # doctest: +SKIP
192
+
193
+ >>> store(x, dset) # doctest: +SKIP
194
+
195
+ Alternatively store many arrays at the same time
196
+
197
+ >>> store([x, y, z], [dset1, dset2, dset3]) # doctest: +SKIP
198
+ """
199
+ from dask.base import persist
200
+ from dask.layers import ArraySliceDep
201
+
202
+ from dask_array._collection import Array
203
+ from dask_array._map_blocks import map_blocks
204
+
205
+ if isinstance(sources, Array):
206
+ sources = [sources]
207
+ targets = [targets]
208
+ targets = cast("Collection[ArrayLike | Delayed]", targets)
209
+
210
+ if any(not isinstance(s, Array) for s in sources):
211
+ raise ValueError("All sources must be dask array objects")
212
+
213
+ if len(sources) != len(targets):
214
+ raise ValueError(f"Different number of sources [{len(sources)}] and targets [{len(targets)}]")
215
+
216
+ if isinstance(regions, tuple) or regions is None:
217
+ regions_list = [regions] * len(sources)
218
+ else:
219
+ regions_list = list(regions)
220
+ if len(sources) != len(regions_list):
221
+ raise ValueError(
222
+ f"Different number of sources [{len(sources)}] and "
223
+ f"targets [{len(targets)}] than regions [{len(regions_list)}]"
224
+ )
225
+ del regions
226
+
227
+ if load_stored is None:
228
+ load_stored = return_stored and not compute
229
+
230
+ if lock is True:
231
+ lock = get_scheduler_lock(Array, kwargs.get("scheduler"))
232
+
233
+ arrays = []
234
+ for s, t, r in zip(sources, targets, regions_list):
235
+ slices = ArraySliceDep(s.chunks)
236
+ arrays.append(
237
+ map_blocks(
238
+ load_store_chunk,
239
+ s,
240
+ t,
241
+ slices,
242
+ region=r,
243
+ lock=lock,
244
+ return_stored=return_stored,
245
+ load_stored=load_stored,
246
+ name="store-map",
247
+ meta=s._meta,
248
+ )
249
+ )
250
+
251
+ if compute:
252
+ if not return_stored:
253
+ import dask
254
+
255
+ dask.compute(arrays, **kwargs)
256
+ return None
257
+ else:
258
+ stored_persisted = persist(*arrays, **kwargs)
259
+ arrays = []
260
+ for s, r in zip(stored_persisted, regions_list):
261
+ slices = ArraySliceDep(s.chunks)
262
+ arrays.append(
263
+ map_blocks(
264
+ load_chunk,
265
+ s,
266
+ slices,
267
+ lock=lock,
268
+ region=r,
269
+ name="load-stored",
270
+ meta=s._meta,
271
+ )
272
+ )
273
+ if len(arrays) == 1:
274
+ return arrays[0]
275
+ return tuple(arrays)
276
+
277
+
278
+ def to_hdf5(filename, *args, chunks=True, **kwargs):
279
+ """Store arrays in HDF5 file
280
+
281
+ This saves several dask arrays into several datapaths in an HDF5 file.
282
+ It creates the necessary datasets and handles clean file opening/closing.
283
+
284
+ Parameters
285
+ ----------
286
+ chunks: tuple or ``True``
287
+ Chunk shape, or ``True`` to pass the chunks from the dask array.
288
+ Defaults to ``True``.
289
+
290
+ Examples
291
+ --------
292
+
293
+ >>> da.to_hdf5('myfile.hdf5', '/x', x) # doctest: +SKIP
294
+
295
+ or
296
+
297
+ >>> da.to_hdf5('myfile.hdf5', {'/x': x, '/y': y}) # doctest: +SKIP
298
+
299
+ Optionally provide arguments as though to ``h5py.File.create_dataset``
300
+
301
+ >>> da.to_hdf5('myfile.hdf5', '/x', x, compression='lzf', shuffle=True) # doctest: +SKIP
302
+
303
+ >>> da.to_hdf5('myfile.hdf5', '/x', x, chunks=(10,20,30)) # doctest: +SKIP
304
+
305
+ This can also be used as a method on a single Array
306
+
307
+ >>> x.to_hdf5('myfile.hdf5', '/x') # doctest: +SKIP
308
+
309
+ See Also
310
+ --------
311
+ da.store
312
+ h5py.File.create_dataset
313
+ """
314
+ from dask_array._collection import Array
315
+
316
+ if len(args) == 1 and isinstance(args[0], dict):
317
+ data = args[0]
318
+ elif len(args) == 2 and isinstance(args[0], str) and isinstance(args[1], Array):
319
+ data = {args[0]: args[1]}
320
+ else:
321
+ raise ValueError("Please provide {'/data/path': array} dictionary")
322
+
323
+ import h5py
324
+
325
+ with h5py.File(filename, mode="a") as f:
326
+ dsets = [
327
+ f.require_dataset(
328
+ dp,
329
+ shape=x.shape,
330
+ dtype=x.dtype,
331
+ chunks=tuple(c[0] for c in x.chunks) if chunks is True else chunks,
332
+ **kwargs,
333
+ )
334
+ for dp, x in data.items()
335
+ ]
336
+ store(list(data.values()), dsets)
@@ -0,0 +1,159 @@
1
+ from __future__ import annotations
2
+
3
+ from dask_array.io._zarr import _check_regular_chunks
4
+ from dask_array.core._conversion import from_array
5
+
6
+
7
+ def _tiledb_to_chunks(tiledb_array):
8
+ schema = tiledb_array.schema
9
+ return list(schema.domain.dim(i).tile for i in range(schema.ndim))
10
+
11
+
12
+ def from_tiledb(uri, attribute=None, chunks=None, storage_options=None, **kwargs):
13
+ """Load array from the TileDB storage format
14
+
15
+ See https://docs.tiledb.io for more information about TileDB.
16
+
17
+ Parameters
18
+ ----------
19
+ uri: TileDB array or str
20
+ Location to save the data
21
+ attribute: str or None
22
+ Attribute selection (single-attribute view on multi-attribute array)
23
+
24
+
25
+ Returns
26
+ -------
27
+
28
+ A Dask Array
29
+
30
+ Examples
31
+ --------
32
+
33
+ >>> import tempfile, tiledb
34
+ >>> import dask_array as da, numpy as np
35
+ >>> uri = tempfile.NamedTemporaryFile().name
36
+ >>> _ = tiledb.from_numpy(uri, np.arange(0,9).reshape(3,3)) # create a tiledb array
37
+ >>> tdb_ar = da.from_tiledb(uri) # read back the array
38
+ >>> tdb_ar.shape
39
+ (3, 3)
40
+ >>> tdb_ar.mean().compute()
41
+ 4.0
42
+ """
43
+ import tiledb
44
+
45
+ tiledb_config = storage_options or dict()
46
+ key = tiledb_config.pop("key", None)
47
+
48
+ if isinstance(uri, tiledb.Array):
49
+ tdb = uri
50
+ else:
51
+ tdb = tiledb.open(uri, attr=attribute, config=tiledb_config, key=key)
52
+
53
+ if tdb.schema.sparse:
54
+ raise ValueError("Sparse TileDB arrays are not supported")
55
+
56
+ if not attribute:
57
+ if tdb.schema.nattr > 1:
58
+ raise TypeError("keyword 'attribute' must be providedwhen loading a multi-attribute TileDB array")
59
+ else:
60
+ attribute = tdb.schema.attr(0).name
61
+
62
+ if tdb.iswritable:
63
+ raise ValueError("TileDB array must be open for reading")
64
+
65
+ chunks = chunks or _tiledb_to_chunks(tdb)
66
+
67
+ assert len(chunks) == tdb.schema.ndim
68
+
69
+ return from_array(tdb, chunks, name=f"tiledb-{uri}")
70
+
71
+
72
+ def to_tiledb(
73
+ darray,
74
+ uri,
75
+ compute=True,
76
+ return_stored=False,
77
+ storage_options=None,
78
+ key=None,
79
+ **kwargs,
80
+ ):
81
+ """Save array to the TileDB storage format
82
+
83
+ Save 'array' using the TileDB storage manager, to any TileDB-supported URI,
84
+ including local disk, S3, or HDFS.
85
+
86
+ See https://docs.tiledb.io for more information about TileDB.
87
+
88
+ Parameters
89
+ ----------
90
+
91
+ darray: dask.array
92
+ A dask array to write.
93
+ uri:
94
+ Any supported TileDB storage location.
95
+ storage_options: dict
96
+ Dict containing any configuration options for the TileDB backend.
97
+ see https://docs.tiledb.io/en/stable/tutorials/config.html
98
+ compute, return_stored: see ``store()``
99
+ key: str or None
100
+ Encryption key
101
+
102
+ Returns
103
+ -------
104
+
105
+ None
106
+ Unless ``return_stored`` is set to ``True`` (``False`` by default)
107
+
108
+ Notes
109
+ -----
110
+
111
+ TileDB only supports regularly-chunked arrays.
112
+ TileDB `tile extents`_ correspond to form 2 of the dask
113
+ `chunk specification`_, and the conversion is
114
+ done automatically for supported arrays.
115
+
116
+ Examples
117
+ --------
118
+
119
+ >>> import dask_array as da, tempfile
120
+ >>> uri = tempfile.NamedTemporaryFile().name
121
+ >>> data = da.random.random(5,5)
122
+ >>> da.to_tiledb(data, uri)
123
+ >>> import tiledb
124
+ >>> tdb_ar = tiledb.open(uri)
125
+ >>> all(tdb_ar == data)
126
+ True
127
+
128
+ .. _chunk specification: https://docs.tiledb.io/en/stable/tutorials/tiling-dense.html
129
+ .. _tile extents: http://docs.dask.org/en/latest/array-chunks.html
130
+ """
131
+ import tiledb
132
+
133
+ tiledb_config = storage_options or dict()
134
+ # encryption key, if any
135
+ key = key or tiledb_config.pop("key", None)
136
+
137
+ if not _check_regular_chunks(darray.chunks):
138
+ raise ValueError(
139
+ "Attempt to save array to TileDB with irregular chunking, please call `arr.rechunk(...)` first."
140
+ )
141
+
142
+ if isinstance(uri, str):
143
+ chunks = [c[0] for c in darray.chunks]
144
+ # create a suitable, empty, writable TileDB array
145
+ tdb = tiledb.empty_like(uri, darray, tile=chunks, config=tiledb_config, key=key, **kwargs)
146
+ elif isinstance(uri, tiledb.Array):
147
+ tdb = uri
148
+ # sanity checks
149
+ if not ((darray.dtype == tdb.dtype) and (darray.ndim == tdb.ndim)):
150
+ raise ValueError("Target TileDB array layout is not compatible with source array")
151
+ else:
152
+ raise ValueError(
153
+ "'uri' must be string pointing to supported TileDB store location or an open, writable TileDB array."
154
+ )
155
+
156
+ if not (tdb.isopen and tdb.iswritable):
157
+ raise ValueError("Target TileDB array is not open and writable.")
158
+
159
+ return darray.store(tdb, lock=False, compute=compute, return_stored=return_stored)