dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
dask_array/_reshape.py
ADDED
|
@@ -0,0 +1,710 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import math
|
|
5
|
+
from functools import reduce
|
|
6
|
+
from itertools import product
|
|
7
|
+
from operator import mul
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from dask._task_spec import Task, TaskRef
|
|
12
|
+
from dask_array._expr import ArrayExpr
|
|
13
|
+
from dask_array.slicing._utils import sanitize_index
|
|
14
|
+
from dask_array._utils import meta_from_array
|
|
15
|
+
from dask.utils import M
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# --------------------------------------------------------------------------
|
|
19
|
+
# reshape_rechunk and helper functions (copied from dask.array.reshape)
|
|
20
|
+
# --------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
_not_implemented_message = """
|
|
23
|
+
Dask's reshape only supports operations that merge or split existing dimensions
|
|
24
|
+
evenly. For example:
|
|
25
|
+
|
|
26
|
+
>>> x = da.ones((6, 5, 4), chunks=(3, 2, 2))
|
|
27
|
+
>>> x.reshape((3, 2, 5, 4)) # supported, splits 6 into 3 & 2
|
|
28
|
+
>>> x.reshape((30, 4)) # supported, merges 6 & 5 into 30
|
|
29
|
+
>>> x.reshape((4, 5, 6)) # unsupported, existing dimensions split unevenly
|
|
30
|
+
|
|
31
|
+
To work around this you may call reshape in multiple passes, or (if your data
|
|
32
|
+
is small enough) call ``compute`` first and handle reshaping in ``numpy``
|
|
33
|
+
directly.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def reshape_rechunk(inshape, outshape, inchunks, disallow_dimension_expansion=False):
|
|
38
|
+
assert all(isinstance(c, tuple) for c in inchunks)
|
|
39
|
+
ii = len(inshape) - 1
|
|
40
|
+
oi = len(outshape) - 1
|
|
41
|
+
result_inchunks = [None for i in range(len(inshape))]
|
|
42
|
+
result_outchunks = [None for i in range(len(outshape))]
|
|
43
|
+
mapper_in, one_dimensions = {}, []
|
|
44
|
+
|
|
45
|
+
while ii >= 0 or oi >= 0:
|
|
46
|
+
if inshape[ii] == outshape[oi]:
|
|
47
|
+
result_inchunks[ii] = inchunks[ii]
|
|
48
|
+
result_outchunks[oi] = inchunks[ii]
|
|
49
|
+
mapper_in[ii] = oi
|
|
50
|
+
ii -= 1
|
|
51
|
+
oi -= 1
|
|
52
|
+
continue
|
|
53
|
+
din = inshape[ii]
|
|
54
|
+
dout = outshape[oi]
|
|
55
|
+
if din == 1:
|
|
56
|
+
result_inchunks[ii] = (1,)
|
|
57
|
+
ii -= 1
|
|
58
|
+
elif dout == 1:
|
|
59
|
+
result_outchunks[oi] = (1,)
|
|
60
|
+
one_dimensions.append(oi)
|
|
61
|
+
oi -= 1
|
|
62
|
+
elif din < dout: # (4, 4, 4) -> (64,)
|
|
63
|
+
ileft = ii - 1
|
|
64
|
+
mapper_in[ii] = oi
|
|
65
|
+
while ileft >= 0 and reduce(mul, inshape[ileft : ii + 1]) < dout: # 4 < 64, 4*4 < 64, 4*4*4 == 64
|
|
66
|
+
mapper_in[ileft] = oi
|
|
67
|
+
ileft -= 1
|
|
68
|
+
|
|
69
|
+
mapper_in[ileft] = oi
|
|
70
|
+
if reduce(mul, inshape[ileft : ii + 1]) != dout:
|
|
71
|
+
raise NotImplementedError(_not_implemented_message)
|
|
72
|
+
# Special case to avoid intermediate rechunking:
|
|
73
|
+
# When all the lower axis are completely chunked (chunksize=1) then
|
|
74
|
+
# we're simply moving around blocks.
|
|
75
|
+
if all(len(inchunks[i]) == inshape[i] for i in range(ii)):
|
|
76
|
+
for i in range(ii + 1):
|
|
77
|
+
result_inchunks[i] = inchunks[i]
|
|
78
|
+
result_outchunks[oi] = inchunks[ii] * math.prod(map(len, inchunks[ileft:ii]))
|
|
79
|
+
else:
|
|
80
|
+
for i in range(ileft + 1, ii + 1): # need single-shape dimensions
|
|
81
|
+
result_inchunks[i] = (inshape[i],) # chunks[i] = (4,)
|
|
82
|
+
|
|
83
|
+
chunk_reduction = reduce(mul, map(len, inchunks[ileft + 1 : ii + 1]))
|
|
84
|
+
result_inchunks[ileft] = expand_tuple(inchunks[ileft], chunk_reduction)
|
|
85
|
+
|
|
86
|
+
max_in_chunk = _cal_max_chunk_size(inchunks, ileft, ii)
|
|
87
|
+
result_inchunks = _smooth_chunks(ileft, ii, max_in_chunk, result_inchunks)
|
|
88
|
+
# Build cross product of result_inchunks[ileft:ii+1]
|
|
89
|
+
result_outchunks[oi] = _calc_lower_dimension_chunks(result_inchunks, ileft, ii)
|
|
90
|
+
|
|
91
|
+
oi -= 1
|
|
92
|
+
ii = ileft - 1
|
|
93
|
+
elif din > dout: # (64,) -> (4, 4, 4)
|
|
94
|
+
if disallow_dimension_expansion:
|
|
95
|
+
raise NotImplementedError(
|
|
96
|
+
"reshape_blockwise not implemented for expanding dimensions without passing chunk hints."
|
|
97
|
+
)
|
|
98
|
+
oleft = oi - 1
|
|
99
|
+
while oleft >= 0 and reduce(mul, outshape[oleft : oi + 1]) < din:
|
|
100
|
+
oleft -= 1
|
|
101
|
+
if reduce(mul, outshape[oleft : oi + 1]) != din:
|
|
102
|
+
raise NotImplementedError(_not_implemented_message)
|
|
103
|
+
# TODO: don't coalesce shapes unnecessarily
|
|
104
|
+
cs = reduce(mul, outshape[oleft + 1 : oi + 1])
|
|
105
|
+
|
|
106
|
+
result_inchunks[ii] = contract_tuple(inchunks[ii], cs) # (16, 16, 16, 16)
|
|
107
|
+
|
|
108
|
+
for i in range(oleft + 1, oi + 1):
|
|
109
|
+
result_outchunks[i] = (outshape[i],)
|
|
110
|
+
|
|
111
|
+
result_outchunks[oleft] = tuple(c // cs for c in result_inchunks[ii])
|
|
112
|
+
|
|
113
|
+
max_in_chunk = _cal_max_chunk_size(inchunks, ii, ii)
|
|
114
|
+
result_outchunks = _smooth_chunks(oleft, oi, max_in_chunk, result_outchunks)
|
|
115
|
+
# Build cross product of result_outchunks[oleft:oi+1]
|
|
116
|
+
result_inchunks[ii] = _calc_lower_dimension_chunks(result_outchunks, oleft, oi)
|
|
117
|
+
oi = oleft - 1
|
|
118
|
+
ii -= 1
|
|
119
|
+
|
|
120
|
+
return tuple(result_inchunks), tuple(result_outchunks), mapper_in, one_dimensions
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _calc_lower_dimension_chunks(chunks, start, stop):
|
|
124
|
+
# We need the lower dimension chunks to match what the higher dimension chunks
|
|
125
|
+
# can be combined to, i.e. multiply the different dimensions
|
|
126
|
+
return tuple(
|
|
127
|
+
map(
|
|
128
|
+
lambda x: reduce(mul, x),
|
|
129
|
+
product(*chunks[start : stop + 1]),
|
|
130
|
+
)
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _smooth_chunks(ileft, ii, max_in_chunk, result_inchunks):
|
|
135
|
+
# The previous step squashed the whole dimension into a single
|
|
136
|
+
# chunk for ileft + 1 (and potentially combined too many elements
|
|
137
|
+
# into a single chunk for ileft as well). We split up the single
|
|
138
|
+
# chunk into multiple chunks to match the max_in_chunk to keep
|
|
139
|
+
# chunksizes consistent:
|
|
140
|
+
# ((1, 1), (200)) -> ((1, 1), (20, ) * 10) for max_in_chunk = 20
|
|
141
|
+
# It's important to ensure that all dimensions before the dimension
|
|
142
|
+
# we adjust have all-1 chunks to respect C contiguous arrays
|
|
143
|
+
# during the reshaping
|
|
144
|
+
|
|
145
|
+
ileft_orig = ileft
|
|
146
|
+
max_result_in_chunk = _cal_max_chunk_size(result_inchunks, ileft, ii)
|
|
147
|
+
if max_in_chunk == max_result_in_chunk:
|
|
148
|
+
# reshaping doesn't mess up
|
|
149
|
+
return result_inchunks
|
|
150
|
+
|
|
151
|
+
while all(x == 1 for x in result_inchunks[ileft]):
|
|
152
|
+
# Find the first dimension where we can split chunks
|
|
153
|
+
ileft += 1
|
|
154
|
+
|
|
155
|
+
if ileft < ii + 1:
|
|
156
|
+
factor = math.ceil(max_result_in_chunk / max_in_chunk)
|
|
157
|
+
result_in_chunk = result_inchunks[ileft]
|
|
158
|
+
|
|
159
|
+
if len(result_in_chunk) == 1:
|
|
160
|
+
# This is a trivial case, when we arrive here is the chunk we are
|
|
161
|
+
# splitting the same length as the whole dimension and all previous
|
|
162
|
+
# chunks that are reshaped into the same dimension are all-one.
|
|
163
|
+
# So we can split this dimension.
|
|
164
|
+
elem = result_in_chunk[0]
|
|
165
|
+
factor = min(factor, elem)
|
|
166
|
+
ceil_elem = math.ceil(elem / factor)
|
|
167
|
+
new_inchunk = [ceil_elem] * factor
|
|
168
|
+
for i in range(ceil_elem * factor - elem):
|
|
169
|
+
new_inchunk[i] -= 1
|
|
170
|
+
result_inchunks[ileft] = tuple(new_inchunk)
|
|
171
|
+
|
|
172
|
+
if all(x == 1 for x in new_inchunk) and ileft < ii:
|
|
173
|
+
# might have to do another round
|
|
174
|
+
return _smooth_chunks(ileft_orig, ii, max_in_chunk, result_inchunks)
|
|
175
|
+
else:
|
|
176
|
+
# We are now in the more complicated case. The first dimension in the set
|
|
177
|
+
# of dimensions to squash has non-ones and our max chunk is bigger than
|
|
178
|
+
# what we want. We need to split the non-ones into multiple chunks along
|
|
179
|
+
# this axis.
|
|
180
|
+
other_max_chunk = max_result_in_chunk // max(result_inchunks[ileft])
|
|
181
|
+
result_in = []
|
|
182
|
+
|
|
183
|
+
for elem_in in result_in_chunk:
|
|
184
|
+
if elem_in * other_max_chunk <= max_in_chunk:
|
|
185
|
+
result_in.append(elem_in)
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
factor = math.ceil(elem_in * other_max_chunk / max_in_chunk)
|
|
189
|
+
ceil_elem = math.ceil(elem_in / factor)
|
|
190
|
+
new_in_chunk = [ceil_elem] * math.ceil(factor)
|
|
191
|
+
for i in range(ceil_elem * factor - elem_in):
|
|
192
|
+
new_in_chunk[i] -= 1
|
|
193
|
+
result_in.extend(new_in_chunk)
|
|
194
|
+
|
|
195
|
+
result_inchunks[ileft] = tuple(result_in)
|
|
196
|
+
return result_inchunks
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _cal_max_chunk_size(chunks, start, stop):
|
|
200
|
+
return int(
|
|
201
|
+
reduce(
|
|
202
|
+
mul,
|
|
203
|
+
[max(chunks[axis]) for axis in range(start, stop + 1)],
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def expand_tuple(chunks, factor):
|
|
209
|
+
"""
|
|
210
|
+
>>> expand_tuple((2, 4), 2)
|
|
211
|
+
(1, 1, 2, 2)
|
|
212
|
+
|
|
213
|
+
>>> expand_tuple((2, 4), 3)
|
|
214
|
+
(1, 1, 1, 1, 2)
|
|
215
|
+
|
|
216
|
+
>>> expand_tuple((3, 4), 2)
|
|
217
|
+
(1, 2, 2, 2)
|
|
218
|
+
|
|
219
|
+
>>> expand_tuple((7, 4), 3)
|
|
220
|
+
(2, 2, 3, 1, 1, 2)
|
|
221
|
+
"""
|
|
222
|
+
if factor == 1:
|
|
223
|
+
return chunks
|
|
224
|
+
|
|
225
|
+
out = []
|
|
226
|
+
for c in chunks:
|
|
227
|
+
x = c
|
|
228
|
+
part = max(x / factor, 1)
|
|
229
|
+
while x >= 2 * part:
|
|
230
|
+
out.append(int(part))
|
|
231
|
+
x -= int(part)
|
|
232
|
+
if x:
|
|
233
|
+
out.append(x)
|
|
234
|
+
assert sum(chunks) == sum(out)
|
|
235
|
+
return tuple(out)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def contract_tuple(chunks, factor):
|
|
239
|
+
"""Return simple chunks tuple such that factor divides all elements
|
|
240
|
+
|
|
241
|
+
Examples
|
|
242
|
+
--------
|
|
243
|
+
>>> contract_tuple((2, 2, 8, 4), 4)
|
|
244
|
+
(4, 8, 4)
|
|
245
|
+
"""
|
|
246
|
+
assert sum(chunks) % factor == 0
|
|
247
|
+
|
|
248
|
+
out = []
|
|
249
|
+
residual = 0
|
|
250
|
+
for chunk in chunks:
|
|
251
|
+
chunk += residual
|
|
252
|
+
div = chunk // factor
|
|
253
|
+
residual = chunk % factor
|
|
254
|
+
good = factor * div
|
|
255
|
+
if good:
|
|
256
|
+
out.append(good)
|
|
257
|
+
return tuple(out)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# --------------------------------------------------------------------------
|
|
261
|
+
# End of reshape_rechunk helpers
|
|
262
|
+
# --------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class Reshape(ArrayExpr):
|
|
266
|
+
"""Reshape array to new shape.
|
|
267
|
+
|
|
268
|
+
This is the high-level expression that gets lowered to ReshapeLowered.
|
|
269
|
+
The lowering step computes the required rechunking.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
_parameters = ["array", "_shape"]
|
|
273
|
+
|
|
274
|
+
def __new__(cls, *args, **kwargs):
|
|
275
|
+
# Call parent __new__ to create the instance
|
|
276
|
+
instance = super().__new__(cls, *args, **kwargs)
|
|
277
|
+
# Eagerly validate by computing chunks (which calls reshape_rechunk)
|
|
278
|
+
# This ensures NotImplementedError is raised at creation time
|
|
279
|
+
_ = instance.chunks
|
|
280
|
+
return instance
|
|
281
|
+
|
|
282
|
+
@functools.cached_property
|
|
283
|
+
def _meta(self):
|
|
284
|
+
return meta_from_array(self.array._meta, ndim=len(self._shape))
|
|
285
|
+
|
|
286
|
+
@functools.cached_property
|
|
287
|
+
def _reshape_chunks(self):
|
|
288
|
+
"""Compute input and output chunks for reshape."""
|
|
289
|
+
inchunks, outchunks, _, _ = reshape_rechunk(self.array.shape, self._shape, self.array.chunks)
|
|
290
|
+
return inchunks, outchunks
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def _inchunks(self):
|
|
294
|
+
return self._reshape_chunks[0]
|
|
295
|
+
|
|
296
|
+
@property
|
|
297
|
+
def _outchunks(self):
|
|
298
|
+
return self._reshape_chunks[1]
|
|
299
|
+
|
|
300
|
+
@functools.cached_property
|
|
301
|
+
def chunks(self):
|
|
302
|
+
return self._outchunks
|
|
303
|
+
|
|
304
|
+
def _lower(self):
|
|
305
|
+
"""Lower to ReshapeLowered with the rechunked array as an operand."""
|
|
306
|
+
if self._inchunks == self.array.chunks:
|
|
307
|
+
rechunked = self.array
|
|
308
|
+
else:
|
|
309
|
+
rechunked = self.array.rechunk(self._inchunks)
|
|
310
|
+
return ReshapeLowered(rechunked, self._shape, self._outchunks)
|
|
311
|
+
|
|
312
|
+
def _simplify_up(self, parent, dependents):
|
|
313
|
+
"""Allow slice operations to push through Reshape."""
|
|
314
|
+
from dask_array.slicing import SliceSlicesIntegers
|
|
315
|
+
|
|
316
|
+
if isinstance(parent, SliceSlicesIntegers):
|
|
317
|
+
return self._accept_slice(parent)
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
def _accept_slice(self, slice_expr):
|
|
321
|
+
"""Accept a slice being pushed through Reshape.
|
|
322
|
+
|
|
323
|
+
Reshape can be pushed through when the slice only affects dimensions
|
|
324
|
+
that have the same size in both input and output shapes (preserved dims).
|
|
325
|
+
|
|
326
|
+
For example:
|
|
327
|
+
x.reshape((10, 2, 3))[:5] # (10, 6) -> (10, 2, 3), first dim preserved
|
|
328
|
+
becomes: x[:5].reshape((5, 2, 3))
|
|
329
|
+
"""
|
|
330
|
+
from numbers import Integral
|
|
331
|
+
|
|
332
|
+
from dask_array._new_collection import new_collection
|
|
333
|
+
|
|
334
|
+
in_shape = self.array.shape
|
|
335
|
+
out_shape = self._shape
|
|
336
|
+
index = slice_expr.index
|
|
337
|
+
|
|
338
|
+
# Separate None (newaxis) from real indices
|
|
339
|
+
# None insertions don't interact with reshape and can be re-applied after
|
|
340
|
+
none_positions = [] # positions where None appears in original index
|
|
341
|
+
stripped_index = [] # index without Nones
|
|
342
|
+
for i, idx in enumerate(index):
|
|
343
|
+
if idx is None:
|
|
344
|
+
none_positions.append(i)
|
|
345
|
+
else:
|
|
346
|
+
stripped_index.append(idx)
|
|
347
|
+
|
|
348
|
+
# Pad stripped index to output ndim
|
|
349
|
+
out_ndim = len(out_shape)
|
|
350
|
+
full_index = list(stripped_index) + [slice(None)] * (out_ndim - len(stripped_index))
|
|
351
|
+
|
|
352
|
+
# Find how many leading dimensions are preserved (same size in both shapes)
|
|
353
|
+
preserved_dims = 0
|
|
354
|
+
for in_size, out_size in zip(in_shape, out_shape):
|
|
355
|
+
if in_size == out_size:
|
|
356
|
+
preserved_dims += 1
|
|
357
|
+
else:
|
|
358
|
+
break
|
|
359
|
+
|
|
360
|
+
if preserved_dims == 0:
|
|
361
|
+
return None # No preserved dimensions, can't push through
|
|
362
|
+
|
|
363
|
+
# Check if slice only affects preserved dimensions
|
|
364
|
+
# (non-preserved dims must all be slice(None))
|
|
365
|
+
if any(isinstance(idx, Integral) or idx != slice(None) for idx in full_index[preserved_dims:]):
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
# Build the input slice (only on preserved dims, same indices)
|
|
369
|
+
in_ndim = len(in_shape)
|
|
370
|
+
input_index = list(full_index[:preserved_dims])
|
|
371
|
+
input_index += [slice(None)] * (in_ndim - preserved_dims)
|
|
372
|
+
|
|
373
|
+
# Compute new output shape after slicing
|
|
374
|
+
new_out_shape = []
|
|
375
|
+
for idx, size in zip(full_index, out_shape):
|
|
376
|
+
if isinstance(idx, Integral):
|
|
377
|
+
# Integer index removes dimension
|
|
378
|
+
continue
|
|
379
|
+
elif idx == slice(None):
|
|
380
|
+
new_out_shape.append(size)
|
|
381
|
+
else:
|
|
382
|
+
# Normalize slice
|
|
383
|
+
start, stop, step = idx.indices(size)
|
|
384
|
+
if step != 1:
|
|
385
|
+
return None # Don't handle non-unit steps
|
|
386
|
+
new_out_shape.append(stop - start)
|
|
387
|
+
|
|
388
|
+
new_out_shape = tuple(new_out_shape)
|
|
389
|
+
|
|
390
|
+
# Apply slice to input, then reshape
|
|
391
|
+
sliced_input = new_collection(self.array)[tuple(input_index)]
|
|
392
|
+
result = Reshape(sliced_input.expr, new_out_shape)
|
|
393
|
+
|
|
394
|
+
# Re-apply None insertions if any using expand_dims
|
|
395
|
+
if none_positions:
|
|
396
|
+
from dask_array.manipulation._expand import expand_dims
|
|
397
|
+
|
|
398
|
+
# Compute where Nones should be inserted in the OUTPUT of reshape
|
|
399
|
+
# Account for integer indices that remove dimensions
|
|
400
|
+
axes = []
|
|
401
|
+
for pos in none_positions:
|
|
402
|
+
# Count how many real (non-None) indices come before this position
|
|
403
|
+
real_before = sum(1 for idx in index[:pos] if idx is not None)
|
|
404
|
+
# Account for integer indices that removed dimensions
|
|
405
|
+
ints_before = sum(1 for idx in stripped_index[:real_before] if isinstance(idx, Integral))
|
|
406
|
+
axes.append(pos - len([p for p in none_positions if p < pos]) - ints_before)
|
|
407
|
+
|
|
408
|
+
return expand_dims(new_collection(result), axis=tuple(axes)).expr
|
|
409
|
+
|
|
410
|
+
return result
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class ReshapeLowered(ArrayExpr):
|
|
414
|
+
"""Lowered reshape expression with rechunked input as operand."""
|
|
415
|
+
|
|
416
|
+
_parameters = ["array", "_shape", "_outchunks"]
|
|
417
|
+
|
|
418
|
+
@functools.cached_property
|
|
419
|
+
def _name(self):
|
|
420
|
+
return f"reshape-{self.deterministic_token}"
|
|
421
|
+
|
|
422
|
+
@functools.cached_property
|
|
423
|
+
def _meta(self):
|
|
424
|
+
return meta_from_array(self.array._meta, ndim=len(self._shape))
|
|
425
|
+
|
|
426
|
+
@functools.cached_property
|
|
427
|
+
def chunks(self):
|
|
428
|
+
return self._outchunks
|
|
429
|
+
|
|
430
|
+
def _layer(self) -> dict:
|
|
431
|
+
inchunks = self.array.chunks
|
|
432
|
+
outchunks = self._outchunks
|
|
433
|
+
|
|
434
|
+
in_keys = list(product([self.array._name], *[range(len(c)) for c in inchunks]))
|
|
435
|
+
out_keys = list(product([self._name], *[range(len(c)) for c in outchunks]))
|
|
436
|
+
shapes = list(product(*outchunks))
|
|
437
|
+
|
|
438
|
+
dsk = {
|
|
439
|
+
out_key: Task(out_key, M.reshape, TaskRef(in_key), shape)
|
|
440
|
+
for out_key, in_key, shape in zip(out_keys, in_keys, shapes)
|
|
441
|
+
}
|
|
442
|
+
return dsk
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def reshape(x, shape, merge_chunks=True, limit=None):
|
|
446
|
+
"""Reshape array to new shape.
|
|
447
|
+
|
|
448
|
+
Parameters
|
|
449
|
+
----------
|
|
450
|
+
x : Array
|
|
451
|
+
Input array
|
|
452
|
+
shape : int or tuple of ints
|
|
453
|
+
The new shape should be compatible with the original shape. If
|
|
454
|
+
an integer, then the result will be a 1-D array of that length.
|
|
455
|
+
One shape dimension can be -1. In this case, the value is
|
|
456
|
+
inferred from the length of the array and remaining dimensions.
|
|
457
|
+
merge_chunks : bool, default True
|
|
458
|
+
Whether to merge chunks using the logic in :meth:`dask.array.rechunk`
|
|
459
|
+
when communication is necessary given the input array chunking and
|
|
460
|
+
the output shape.
|
|
461
|
+
limit : int (optional)
|
|
462
|
+
The maximum block size to target in bytes.
|
|
463
|
+
|
|
464
|
+
Returns
|
|
465
|
+
-------
|
|
466
|
+
reshaped : Array
|
|
467
|
+
"""
|
|
468
|
+
from dask_array._new_collection import new_collection
|
|
469
|
+
|
|
470
|
+
# Normalize shape
|
|
471
|
+
if isinstance(shape, int):
|
|
472
|
+
shape = (shape,)
|
|
473
|
+
shape = tuple(map(sanitize_index, shape))
|
|
474
|
+
|
|
475
|
+
# Handle -1 in shape
|
|
476
|
+
known_sizes = [s for s in shape if s != -1]
|
|
477
|
+
if len(known_sizes) < len(shape):
|
|
478
|
+
if len(shape) - len(known_sizes) > 1:
|
|
479
|
+
raise ValueError("can only specify one unknown dimension")
|
|
480
|
+
# Fastpath for x.reshape(-1) on 1D arrays
|
|
481
|
+
if len(shape) == 1 and x.ndim == 1:
|
|
482
|
+
return new_collection(x.expr)
|
|
483
|
+
missing_size = sanitize_index(x.size / reduce(mul, known_sizes, 1))
|
|
484
|
+
shape = tuple(missing_size if s == -1 else s for s in shape)
|
|
485
|
+
|
|
486
|
+
# Sanity checks
|
|
487
|
+
if np.isnan(sum(x.shape)):
|
|
488
|
+
raise ValueError(
|
|
489
|
+
f"Array chunk size or shape is unknown. shape: {x.shape}\n\nPossible solution with x.compute_chunk_sizes()"
|
|
490
|
+
)
|
|
491
|
+
if reduce(mul, shape, 1) != x.size:
|
|
492
|
+
raise ValueError("total size of new array must be unchanged")
|
|
493
|
+
|
|
494
|
+
# Identity reshape - return input unchanged
|
|
495
|
+
if x.shape == shape:
|
|
496
|
+
return x
|
|
497
|
+
|
|
498
|
+
# Single partition case: use simple blockwise reshape
|
|
499
|
+
expr = x.expr
|
|
500
|
+
npartitions = reduce(mul, (len(c) for c in expr.chunks), 1)
|
|
501
|
+
if npartitions == 1:
|
|
502
|
+
return new_collection(ReshapeLowered(expr, shape, tuple((d,) for d in shape)))
|
|
503
|
+
|
|
504
|
+
# Handle merge_chunks=False: pre-rechunk to size-1 chunks in early dimensions
|
|
505
|
+
if not merge_chunks and x.ndim > len(shape):
|
|
506
|
+
pre_rechunk = dict.fromkeys(range(x.ndim - len(shape)), 1)
|
|
507
|
+
expr = expr.rechunk(pre_rechunk)
|
|
508
|
+
|
|
509
|
+
return new_collection(Reshape(expr, shape))
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
class ReshapeBlockwise(ArrayExpr):
|
|
513
|
+
"""Blockwise reshape - each block reshaped independently.
|
|
514
|
+
|
|
515
|
+
Unlike regular Reshape, this doesn't rechunk. Each block is independently
|
|
516
|
+
reshaped and the results are concatenated. The output may have different
|
|
517
|
+
element ordering than NumPy's reshape.
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
_parameters = ["array", "_shape", "_chunks"]
|
|
521
|
+
_defaults = {"_chunks": None}
|
|
522
|
+
|
|
523
|
+
@functools.cached_property
|
|
524
|
+
def _meta(self):
|
|
525
|
+
return meta_from_array(self.array._meta, ndim=len(self._shape))
|
|
526
|
+
|
|
527
|
+
@functools.cached_property
|
|
528
|
+
def _reshape_info(self):
|
|
529
|
+
"""Compute reshape mapping info (cached to avoid recomputation)."""
|
|
530
|
+
if len(self._shape) > self.array.ndim:
|
|
531
|
+
return None # Expansion case uses provided chunks directly
|
|
532
|
+
|
|
533
|
+
_, _, mapper_in, one_dimensions = reshape_rechunk(
|
|
534
|
+
self.array.shape,
|
|
535
|
+
self._shape,
|
|
536
|
+
self.array.chunks,
|
|
537
|
+
disallow_dimension_expansion=True,
|
|
538
|
+
)
|
|
539
|
+
return mapper_in, one_dimensions
|
|
540
|
+
|
|
541
|
+
@functools.cached_property
|
|
542
|
+
def _out_shapes(self):
|
|
543
|
+
"""Per-block output shapes."""
|
|
544
|
+
if len(self._shape) > self.array.ndim:
|
|
545
|
+
return list(product(*(c for c in self._chunks)))
|
|
546
|
+
|
|
547
|
+
mapper_in, one_dims = self._reshape_info
|
|
548
|
+
return [self._convert_to_shape(c, mapper_in, one_dims) for c in product(*(c for c in self.array.chunks))]
|
|
549
|
+
|
|
550
|
+
@functools.cached_property
|
|
551
|
+
def chunks(self):
|
|
552
|
+
if len(self._shape) > self.array.ndim:
|
|
553
|
+
if self._chunks is None:
|
|
554
|
+
raise TypeError("Need to specify chunks if expanding dimensions.")
|
|
555
|
+
return self._chunks
|
|
556
|
+
|
|
557
|
+
mapper_in, one_dims = self._reshape_info
|
|
558
|
+
nr_out_chunks = self._convert_to_shape(tuple(map(len, self.array.chunks)), mapper_in, one_dims)
|
|
559
|
+
|
|
560
|
+
# Build output chunks from per-block shapes
|
|
561
|
+
output_chunks = []
|
|
562
|
+
ctr = 1
|
|
563
|
+
for i, nr_chunks_dim in enumerate(reversed(nr_out_chunks)):
|
|
564
|
+
dim_chunks = [self._out_shapes[elem * ctr][len(nr_out_chunks) - i - 1] for elem in range(nr_chunks_dim)]
|
|
565
|
+
output_chunks.append(tuple(dim_chunks))
|
|
566
|
+
ctr *= nr_chunks_dim
|
|
567
|
+
|
|
568
|
+
return tuple(reversed(output_chunks))
|
|
569
|
+
|
|
570
|
+
@staticmethod
|
|
571
|
+
def _convert_to_shape(shape, mapper_in, one_dims):
|
|
572
|
+
"""Map input dimensions to output dimensions."""
|
|
573
|
+
output_shape = [[] for _ in range(len(set(mapper_in.values())) + len(one_dims))]
|
|
574
|
+
for i in one_dims:
|
|
575
|
+
output_shape[i] = [1]
|
|
576
|
+
for k, v in mapper_in.items():
|
|
577
|
+
output_shape[v].append(shape[k])
|
|
578
|
+
return tuple(reduce(mul, x) for x in output_shape)
|
|
579
|
+
|
|
580
|
+
def _layer(self) -> dict:
|
|
581
|
+
in_keys = list(product([self.array._name], *[range(len(c)) for c in self.array.chunks]))
|
|
582
|
+
out_keys = list(product([self._name], *[range(len(c)) for c in self.chunks]))
|
|
583
|
+
|
|
584
|
+
return {
|
|
585
|
+
out_key: Task(out_key, M.reshape, TaskRef(in_key), shape)
|
|
586
|
+
for in_key, out_key, shape in zip(in_keys, out_keys, self._out_shapes)
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def reshape_blockwise(x, shape, chunks=None):
|
|
591
|
+
"""Blockwise-reshape into a new shape.
|
|
592
|
+
|
|
593
|
+
The regular reshape operation in Dask preserves C-ordering in the array
|
|
594
|
+
which requires a rechunking for most reshaping operations, making the
|
|
595
|
+
computation relatively expensive.
|
|
596
|
+
|
|
597
|
+
Blockwise-reshape reshapes every block into the new shape and concatenates
|
|
598
|
+
the results. This is a trivial blockwise computation but will return the
|
|
599
|
+
result in a different order than NumPy. This is a good solution for
|
|
600
|
+
subsequent operations that don't rely on the order.
|
|
601
|
+
|
|
602
|
+
Parameters
|
|
603
|
+
----------
|
|
604
|
+
x : Array
|
|
605
|
+
The input array to reshape.
|
|
606
|
+
shape : int or tuple of ints
|
|
607
|
+
The new shape should be compatible with the original shape. If
|
|
608
|
+
an integer, then the result will be a 1-D array of that length.
|
|
609
|
+
One shape dimension can be -1. In this case, the value is
|
|
610
|
+
inferred from the length of the array and remaining dimensions.
|
|
611
|
+
chunks : tuple of tuples of ints, optional
|
|
612
|
+
The chunk sizes for the output array. Required when expanding
|
|
613
|
+
dimensions (increasing ndim). Ignored when collapsing dimensions.
|
|
614
|
+
|
|
615
|
+
Returns
|
|
616
|
+
-------
|
|
617
|
+
reshaped : Array
|
|
618
|
+
|
|
619
|
+
Notes
|
|
620
|
+
-----
|
|
621
|
+
This is a parallelized version of ``np.reshape`` with the following
|
|
622
|
+
limitations:
|
|
623
|
+
|
|
624
|
+
1. It does not return elements in the same order as NumPy would
|
|
625
|
+
2. It only allows for reshapings that collapse like ``(1, 2, 3, 4) -> (1, 6, 4)``
|
|
626
|
+
|
|
627
|
+
Examples
|
|
628
|
+
--------
|
|
629
|
+
>>> import dask_array as da
|
|
630
|
+
>>> x = da.from_array(np.arange(0, 27).reshape(3, 3, 3), chunks=(3, 2, (2, 1)))
|
|
631
|
+
>>> result = reshape_blockwise(x, (3, 9))
|
|
632
|
+
>>> result.chunks
|
|
633
|
+
((3,), (4, 2, 2, 1))
|
|
634
|
+
"""
|
|
635
|
+
import math
|
|
636
|
+
|
|
637
|
+
from dask_array._new_collection import new_collection
|
|
638
|
+
from dask_array.core import asarray
|
|
639
|
+
|
|
640
|
+
x = asarray(x)
|
|
641
|
+
|
|
642
|
+
if shape in [-1, (-1,)]:
|
|
643
|
+
shape = (reduce(mul, x.shape),)
|
|
644
|
+
|
|
645
|
+
if not isinstance(shape, tuple):
|
|
646
|
+
shape = (shape,)
|
|
647
|
+
|
|
648
|
+
# Validate shape
|
|
649
|
+
if np.isnan(sum(x.shape)):
|
|
650
|
+
raise ValueError(
|
|
651
|
+
f"Array chunk size or shape is unknown. shape: {x.shape}\n\nPossible solution with x.compute_chunk_sizes()"
|
|
652
|
+
)
|
|
653
|
+
if reduce(mul, shape, 1) != x.size:
|
|
654
|
+
raise ValueError("total size of new array must be unchanged")
|
|
655
|
+
|
|
656
|
+
# Identity reshape
|
|
657
|
+
if len(shape) == x.ndim and shape == x.shape:
|
|
658
|
+
return x
|
|
659
|
+
|
|
660
|
+
# Validate chunks for expansion
|
|
661
|
+
if len(shape) > x.ndim:
|
|
662
|
+
if chunks is None:
|
|
663
|
+
raise TypeError("Need to specify chunks if expanding dimensions.")
|
|
664
|
+
out_shapes = list(product(*(c for c in chunks)))
|
|
665
|
+
in_shapes = list(product(*(c for c in x.chunks)))
|
|
666
|
+
non_matching_chunks = [
|
|
667
|
+
(i, in_c, out_c)
|
|
668
|
+
for i, (in_c, out_c) in enumerate(zip(in_shapes, out_shapes))
|
|
669
|
+
if math.prod(in_c) != math.prod(out_c)
|
|
670
|
+
]
|
|
671
|
+
if non_matching_chunks:
|
|
672
|
+
raise ValueError(
|
|
673
|
+
f"Chunk sizes do not match for the following chunks: "
|
|
674
|
+
f"{[c[0] for c in non_matching_chunks[:5]]}. \n"
|
|
675
|
+
f"The corresponding chunksizes are: {[c[1:] for c in non_matching_chunks[:5]]}. "
|
|
676
|
+
f"(restricted to first 5 entries)."
|
|
677
|
+
)
|
|
678
|
+
elif chunks is not None:
|
|
679
|
+
raise ValueError("Setting chunks is not allowed when reducing the number of dimensions.")
|
|
680
|
+
|
|
681
|
+
return new_collection(ReshapeBlockwise(x.expr, shape, chunks))
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def ravel(array_like):
|
|
685
|
+
"""Return a flattened array.
|
|
686
|
+
|
|
687
|
+
Parameters
|
|
688
|
+
----------
|
|
689
|
+
array_like : array_like
|
|
690
|
+
Input array. Non-array inputs are converted to arrays.
|
|
691
|
+
|
|
692
|
+
Returns
|
|
693
|
+
-------
|
|
694
|
+
raveled : Array
|
|
695
|
+
A 1-D array containing the elements of the input.
|
|
696
|
+
|
|
697
|
+
See Also
|
|
698
|
+
--------
|
|
699
|
+
numpy.ravel
|
|
700
|
+
|
|
701
|
+
Examples
|
|
702
|
+
--------
|
|
703
|
+
>>> import dask_array as da
|
|
704
|
+
>>> x = da.ones((2, 3), chunks=2)
|
|
705
|
+
>>> da.ravel(x).compute()
|
|
706
|
+
array([1., 1., 1., 1., 1., 1.])
|
|
707
|
+
"""
|
|
708
|
+
from dask_array.core import asanyarray
|
|
709
|
+
|
|
710
|
+
return asanyarray(array_like).reshape((-1,))
|