dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
dask_array/_reshape.py ADDED
@@ -0,0 +1,710 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import math
5
+ from functools import reduce
6
+ from itertools import product
7
+ from operator import mul
8
+
9
+ import numpy as np
10
+
11
+ from dask._task_spec import Task, TaskRef
12
+ from dask_array._expr import ArrayExpr
13
+ from dask_array.slicing._utils import sanitize_index
14
+ from dask_array._utils import meta_from_array
15
+ from dask.utils import M
16
+
17
+
18
+ # --------------------------------------------------------------------------
19
+ # reshape_rechunk and helper functions (copied from dask.array.reshape)
20
+ # --------------------------------------------------------------------------
21
+
22
+ _not_implemented_message = """
23
+ Dask's reshape only supports operations that merge or split existing dimensions
24
+ evenly. For example:
25
+
26
+ >>> x = da.ones((6, 5, 4), chunks=(3, 2, 2))
27
+ >>> x.reshape((3, 2, 5, 4)) # supported, splits 6 into 3 & 2
28
+ >>> x.reshape((30, 4)) # supported, merges 6 & 5 into 30
29
+ >>> x.reshape((4, 5, 6)) # unsupported, existing dimensions split unevenly
30
+
31
+ To work around this you may call reshape in multiple passes, or (if your data
32
+ is small enough) call ``compute`` first and handle reshaping in ``numpy``
33
+ directly.
34
+ """
35
+
36
+
37
+ def reshape_rechunk(inshape, outshape, inchunks, disallow_dimension_expansion=False):
38
+ assert all(isinstance(c, tuple) for c in inchunks)
39
+ ii = len(inshape) - 1
40
+ oi = len(outshape) - 1
41
+ result_inchunks = [None for i in range(len(inshape))]
42
+ result_outchunks = [None for i in range(len(outshape))]
43
+ mapper_in, one_dimensions = {}, []
44
+
45
+ while ii >= 0 or oi >= 0:
46
+ if inshape[ii] == outshape[oi]:
47
+ result_inchunks[ii] = inchunks[ii]
48
+ result_outchunks[oi] = inchunks[ii]
49
+ mapper_in[ii] = oi
50
+ ii -= 1
51
+ oi -= 1
52
+ continue
53
+ din = inshape[ii]
54
+ dout = outshape[oi]
55
+ if din == 1:
56
+ result_inchunks[ii] = (1,)
57
+ ii -= 1
58
+ elif dout == 1:
59
+ result_outchunks[oi] = (1,)
60
+ one_dimensions.append(oi)
61
+ oi -= 1
62
+ elif din < dout: # (4, 4, 4) -> (64,)
63
+ ileft = ii - 1
64
+ mapper_in[ii] = oi
65
+ while ileft >= 0 and reduce(mul, inshape[ileft : ii + 1]) < dout: # 4 < 64, 4*4 < 64, 4*4*4 == 64
66
+ mapper_in[ileft] = oi
67
+ ileft -= 1
68
+
69
+ mapper_in[ileft] = oi
70
+ if reduce(mul, inshape[ileft : ii + 1]) != dout:
71
+ raise NotImplementedError(_not_implemented_message)
72
+ # Special case to avoid intermediate rechunking:
73
+ # When all the lower axis are completely chunked (chunksize=1) then
74
+ # we're simply moving around blocks.
75
+ if all(len(inchunks[i]) == inshape[i] for i in range(ii)):
76
+ for i in range(ii + 1):
77
+ result_inchunks[i] = inchunks[i]
78
+ result_outchunks[oi] = inchunks[ii] * math.prod(map(len, inchunks[ileft:ii]))
79
+ else:
80
+ for i in range(ileft + 1, ii + 1): # need single-shape dimensions
81
+ result_inchunks[i] = (inshape[i],) # chunks[i] = (4,)
82
+
83
+ chunk_reduction = reduce(mul, map(len, inchunks[ileft + 1 : ii + 1]))
84
+ result_inchunks[ileft] = expand_tuple(inchunks[ileft], chunk_reduction)
85
+
86
+ max_in_chunk = _cal_max_chunk_size(inchunks, ileft, ii)
87
+ result_inchunks = _smooth_chunks(ileft, ii, max_in_chunk, result_inchunks)
88
+ # Build cross product of result_inchunks[ileft:ii+1]
89
+ result_outchunks[oi] = _calc_lower_dimension_chunks(result_inchunks, ileft, ii)
90
+
91
+ oi -= 1
92
+ ii = ileft - 1
93
+ elif din > dout: # (64,) -> (4, 4, 4)
94
+ if disallow_dimension_expansion:
95
+ raise NotImplementedError(
96
+ "reshape_blockwise not implemented for expanding dimensions without passing chunk hints."
97
+ )
98
+ oleft = oi - 1
99
+ while oleft >= 0 and reduce(mul, outshape[oleft : oi + 1]) < din:
100
+ oleft -= 1
101
+ if reduce(mul, outshape[oleft : oi + 1]) != din:
102
+ raise NotImplementedError(_not_implemented_message)
103
+ # TODO: don't coalesce shapes unnecessarily
104
+ cs = reduce(mul, outshape[oleft + 1 : oi + 1])
105
+
106
+ result_inchunks[ii] = contract_tuple(inchunks[ii], cs) # (16, 16, 16, 16)
107
+
108
+ for i in range(oleft + 1, oi + 1):
109
+ result_outchunks[i] = (outshape[i],)
110
+
111
+ result_outchunks[oleft] = tuple(c // cs for c in result_inchunks[ii])
112
+
113
+ max_in_chunk = _cal_max_chunk_size(inchunks, ii, ii)
114
+ result_outchunks = _smooth_chunks(oleft, oi, max_in_chunk, result_outchunks)
115
+ # Build cross product of result_outchunks[oleft:oi+1]
116
+ result_inchunks[ii] = _calc_lower_dimension_chunks(result_outchunks, oleft, oi)
117
+ oi = oleft - 1
118
+ ii -= 1
119
+
120
+ return tuple(result_inchunks), tuple(result_outchunks), mapper_in, one_dimensions
121
+
122
+
123
+ def _calc_lower_dimension_chunks(chunks, start, stop):
124
+ # We need the lower dimension chunks to match what the higher dimension chunks
125
+ # can be combined to, i.e. multiply the different dimensions
126
+ return tuple(
127
+ map(
128
+ lambda x: reduce(mul, x),
129
+ product(*chunks[start : stop + 1]),
130
+ )
131
+ )
132
+
133
+
134
+ def _smooth_chunks(ileft, ii, max_in_chunk, result_inchunks):
135
+ # The previous step squashed the whole dimension into a single
136
+ # chunk for ileft + 1 (and potentially combined too many elements
137
+ # into a single chunk for ileft as well). We split up the single
138
+ # chunk into multiple chunks to match the max_in_chunk to keep
139
+ # chunksizes consistent:
140
+ # ((1, 1), (200)) -> ((1, 1), (20, ) * 10) for max_in_chunk = 20
141
+ # It's important to ensure that all dimensions before the dimension
142
+ # we adjust have all-1 chunks to respect C contiguous arrays
143
+ # during the reshaping
144
+
145
+ ileft_orig = ileft
146
+ max_result_in_chunk = _cal_max_chunk_size(result_inchunks, ileft, ii)
147
+ if max_in_chunk == max_result_in_chunk:
148
+ # reshaping doesn't mess up
149
+ return result_inchunks
150
+
151
+ while all(x == 1 for x in result_inchunks[ileft]):
152
+ # Find the first dimension where we can split chunks
153
+ ileft += 1
154
+
155
+ if ileft < ii + 1:
156
+ factor = math.ceil(max_result_in_chunk / max_in_chunk)
157
+ result_in_chunk = result_inchunks[ileft]
158
+
159
+ if len(result_in_chunk) == 1:
160
+ # This is a trivial case, when we arrive here is the chunk we are
161
+ # splitting the same length as the whole dimension and all previous
162
+ # chunks that are reshaped into the same dimension are all-one.
163
+ # So we can split this dimension.
164
+ elem = result_in_chunk[0]
165
+ factor = min(factor, elem)
166
+ ceil_elem = math.ceil(elem / factor)
167
+ new_inchunk = [ceil_elem] * factor
168
+ for i in range(ceil_elem * factor - elem):
169
+ new_inchunk[i] -= 1
170
+ result_inchunks[ileft] = tuple(new_inchunk)
171
+
172
+ if all(x == 1 for x in new_inchunk) and ileft < ii:
173
+ # might have to do another round
174
+ return _smooth_chunks(ileft_orig, ii, max_in_chunk, result_inchunks)
175
+ else:
176
+ # We are now in the more complicated case. The first dimension in the set
177
+ # of dimensions to squash has non-ones and our max chunk is bigger than
178
+ # what we want. We need to split the non-ones into multiple chunks along
179
+ # this axis.
180
+ other_max_chunk = max_result_in_chunk // max(result_inchunks[ileft])
181
+ result_in = []
182
+
183
+ for elem_in in result_in_chunk:
184
+ if elem_in * other_max_chunk <= max_in_chunk:
185
+ result_in.append(elem_in)
186
+ continue
187
+
188
+ factor = math.ceil(elem_in * other_max_chunk / max_in_chunk)
189
+ ceil_elem = math.ceil(elem_in / factor)
190
+ new_in_chunk = [ceil_elem] * math.ceil(factor)
191
+ for i in range(ceil_elem * factor - elem_in):
192
+ new_in_chunk[i] -= 1
193
+ result_in.extend(new_in_chunk)
194
+
195
+ result_inchunks[ileft] = tuple(result_in)
196
+ return result_inchunks
197
+
198
+
199
+ def _cal_max_chunk_size(chunks, start, stop):
200
+ return int(
201
+ reduce(
202
+ mul,
203
+ [max(chunks[axis]) for axis in range(start, stop + 1)],
204
+ )
205
+ )
206
+
207
+
208
+ def expand_tuple(chunks, factor):
209
+ """
210
+ >>> expand_tuple((2, 4), 2)
211
+ (1, 1, 2, 2)
212
+
213
+ >>> expand_tuple((2, 4), 3)
214
+ (1, 1, 1, 1, 2)
215
+
216
+ >>> expand_tuple((3, 4), 2)
217
+ (1, 2, 2, 2)
218
+
219
+ >>> expand_tuple((7, 4), 3)
220
+ (2, 2, 3, 1, 1, 2)
221
+ """
222
+ if factor == 1:
223
+ return chunks
224
+
225
+ out = []
226
+ for c in chunks:
227
+ x = c
228
+ part = max(x / factor, 1)
229
+ while x >= 2 * part:
230
+ out.append(int(part))
231
+ x -= int(part)
232
+ if x:
233
+ out.append(x)
234
+ assert sum(chunks) == sum(out)
235
+ return tuple(out)
236
+
237
+
238
+ def contract_tuple(chunks, factor):
239
+ """Return simple chunks tuple such that factor divides all elements
240
+
241
+ Examples
242
+ --------
243
+ >>> contract_tuple((2, 2, 8, 4), 4)
244
+ (4, 8, 4)
245
+ """
246
+ assert sum(chunks) % factor == 0
247
+
248
+ out = []
249
+ residual = 0
250
+ for chunk in chunks:
251
+ chunk += residual
252
+ div = chunk // factor
253
+ residual = chunk % factor
254
+ good = factor * div
255
+ if good:
256
+ out.append(good)
257
+ return tuple(out)
258
+
259
+
260
+ # --------------------------------------------------------------------------
261
+ # End of reshape_rechunk helpers
262
+ # --------------------------------------------------------------------------
263
+
264
+
265
+ class Reshape(ArrayExpr):
266
+ """Reshape array to new shape.
267
+
268
+ This is the high-level expression that gets lowered to ReshapeLowered.
269
+ The lowering step computes the required rechunking.
270
+ """
271
+
272
+ _parameters = ["array", "_shape"]
273
+
274
+ def __new__(cls, *args, **kwargs):
275
+ # Call parent __new__ to create the instance
276
+ instance = super().__new__(cls, *args, **kwargs)
277
+ # Eagerly validate by computing chunks (which calls reshape_rechunk)
278
+ # This ensures NotImplementedError is raised at creation time
279
+ _ = instance.chunks
280
+ return instance
281
+
282
+ @functools.cached_property
283
+ def _meta(self):
284
+ return meta_from_array(self.array._meta, ndim=len(self._shape))
285
+
286
+ @functools.cached_property
287
+ def _reshape_chunks(self):
288
+ """Compute input and output chunks for reshape."""
289
+ inchunks, outchunks, _, _ = reshape_rechunk(self.array.shape, self._shape, self.array.chunks)
290
+ return inchunks, outchunks
291
+
292
+ @property
293
+ def _inchunks(self):
294
+ return self._reshape_chunks[0]
295
+
296
+ @property
297
+ def _outchunks(self):
298
+ return self._reshape_chunks[1]
299
+
300
+ @functools.cached_property
301
+ def chunks(self):
302
+ return self._outchunks
303
+
304
+ def _lower(self):
305
+ """Lower to ReshapeLowered with the rechunked array as an operand."""
306
+ if self._inchunks == self.array.chunks:
307
+ rechunked = self.array
308
+ else:
309
+ rechunked = self.array.rechunk(self._inchunks)
310
+ return ReshapeLowered(rechunked, self._shape, self._outchunks)
311
+
312
+ def _simplify_up(self, parent, dependents):
313
+ """Allow slice operations to push through Reshape."""
314
+ from dask_array.slicing import SliceSlicesIntegers
315
+
316
+ if isinstance(parent, SliceSlicesIntegers):
317
+ return self._accept_slice(parent)
318
+ return None
319
+
320
+ def _accept_slice(self, slice_expr):
321
+ """Accept a slice being pushed through Reshape.
322
+
323
+ Reshape can be pushed through when the slice only affects dimensions
324
+ that have the same size in both input and output shapes (preserved dims).
325
+
326
+ For example:
327
+ x.reshape((10, 2, 3))[:5] # (10, 6) -> (10, 2, 3), first dim preserved
328
+ becomes: x[:5].reshape((5, 2, 3))
329
+ """
330
+ from numbers import Integral
331
+
332
+ from dask_array._new_collection import new_collection
333
+
334
+ in_shape = self.array.shape
335
+ out_shape = self._shape
336
+ index = slice_expr.index
337
+
338
+ # Separate None (newaxis) from real indices
339
+ # None insertions don't interact with reshape and can be re-applied after
340
+ none_positions = [] # positions where None appears in original index
341
+ stripped_index = [] # index without Nones
342
+ for i, idx in enumerate(index):
343
+ if idx is None:
344
+ none_positions.append(i)
345
+ else:
346
+ stripped_index.append(idx)
347
+
348
+ # Pad stripped index to output ndim
349
+ out_ndim = len(out_shape)
350
+ full_index = list(stripped_index) + [slice(None)] * (out_ndim - len(stripped_index))
351
+
352
+ # Find how many leading dimensions are preserved (same size in both shapes)
353
+ preserved_dims = 0
354
+ for in_size, out_size in zip(in_shape, out_shape):
355
+ if in_size == out_size:
356
+ preserved_dims += 1
357
+ else:
358
+ break
359
+
360
+ if preserved_dims == 0:
361
+ return None # No preserved dimensions, can't push through
362
+
363
+ # Check if slice only affects preserved dimensions
364
+ # (non-preserved dims must all be slice(None))
365
+ if any(isinstance(idx, Integral) or idx != slice(None) for idx in full_index[preserved_dims:]):
366
+ return None
367
+
368
+ # Build the input slice (only on preserved dims, same indices)
369
+ in_ndim = len(in_shape)
370
+ input_index = list(full_index[:preserved_dims])
371
+ input_index += [slice(None)] * (in_ndim - preserved_dims)
372
+
373
+ # Compute new output shape after slicing
374
+ new_out_shape = []
375
+ for idx, size in zip(full_index, out_shape):
376
+ if isinstance(idx, Integral):
377
+ # Integer index removes dimension
378
+ continue
379
+ elif idx == slice(None):
380
+ new_out_shape.append(size)
381
+ else:
382
+ # Normalize slice
383
+ start, stop, step = idx.indices(size)
384
+ if step != 1:
385
+ return None # Don't handle non-unit steps
386
+ new_out_shape.append(stop - start)
387
+
388
+ new_out_shape = tuple(new_out_shape)
389
+
390
+ # Apply slice to input, then reshape
391
+ sliced_input = new_collection(self.array)[tuple(input_index)]
392
+ result = Reshape(sliced_input.expr, new_out_shape)
393
+
394
+ # Re-apply None insertions if any using expand_dims
395
+ if none_positions:
396
+ from dask_array.manipulation._expand import expand_dims
397
+
398
+ # Compute where Nones should be inserted in the OUTPUT of reshape
399
+ # Account for integer indices that remove dimensions
400
+ axes = []
401
+ for pos in none_positions:
402
+ # Count how many real (non-None) indices come before this position
403
+ real_before = sum(1 for idx in index[:pos] if idx is not None)
404
+ # Account for integer indices that removed dimensions
405
+ ints_before = sum(1 for idx in stripped_index[:real_before] if isinstance(idx, Integral))
406
+ axes.append(pos - len([p for p in none_positions if p < pos]) - ints_before)
407
+
408
+ return expand_dims(new_collection(result), axis=tuple(axes)).expr
409
+
410
+ return result
411
+
412
+
413
+ class ReshapeLowered(ArrayExpr):
414
+ """Lowered reshape expression with rechunked input as operand."""
415
+
416
+ _parameters = ["array", "_shape", "_outchunks"]
417
+
418
+ @functools.cached_property
419
+ def _name(self):
420
+ return f"reshape-{self.deterministic_token}"
421
+
422
+ @functools.cached_property
423
+ def _meta(self):
424
+ return meta_from_array(self.array._meta, ndim=len(self._shape))
425
+
426
+ @functools.cached_property
427
+ def chunks(self):
428
+ return self._outchunks
429
+
430
+ def _layer(self) -> dict:
431
+ inchunks = self.array.chunks
432
+ outchunks = self._outchunks
433
+
434
+ in_keys = list(product([self.array._name], *[range(len(c)) for c in inchunks]))
435
+ out_keys = list(product([self._name], *[range(len(c)) for c in outchunks]))
436
+ shapes = list(product(*outchunks))
437
+
438
+ dsk = {
439
+ out_key: Task(out_key, M.reshape, TaskRef(in_key), shape)
440
+ for out_key, in_key, shape in zip(out_keys, in_keys, shapes)
441
+ }
442
+ return dsk
443
+
444
+
445
+ def reshape(x, shape, merge_chunks=True, limit=None):
446
+ """Reshape array to new shape.
447
+
448
+ Parameters
449
+ ----------
450
+ x : Array
451
+ Input array
452
+ shape : int or tuple of ints
453
+ The new shape should be compatible with the original shape. If
454
+ an integer, then the result will be a 1-D array of that length.
455
+ One shape dimension can be -1. In this case, the value is
456
+ inferred from the length of the array and remaining dimensions.
457
+ merge_chunks : bool, default True
458
+ Whether to merge chunks using the logic in :meth:`dask.array.rechunk`
459
+ when communication is necessary given the input array chunking and
460
+ the output shape.
461
+ limit : int (optional)
462
+ The maximum block size to target in bytes.
463
+
464
+ Returns
465
+ -------
466
+ reshaped : Array
467
+ """
468
+ from dask_array._new_collection import new_collection
469
+
470
+ # Normalize shape
471
+ if isinstance(shape, int):
472
+ shape = (shape,)
473
+ shape = tuple(map(sanitize_index, shape))
474
+
475
+ # Handle -1 in shape
476
+ known_sizes = [s for s in shape if s != -1]
477
+ if len(known_sizes) < len(shape):
478
+ if len(shape) - len(known_sizes) > 1:
479
+ raise ValueError("can only specify one unknown dimension")
480
+ # Fastpath for x.reshape(-1) on 1D arrays
481
+ if len(shape) == 1 and x.ndim == 1:
482
+ return new_collection(x.expr)
483
+ missing_size = sanitize_index(x.size / reduce(mul, known_sizes, 1))
484
+ shape = tuple(missing_size if s == -1 else s for s in shape)
485
+
486
+ # Sanity checks
487
+ if np.isnan(sum(x.shape)):
488
+ raise ValueError(
489
+ f"Array chunk size or shape is unknown. shape: {x.shape}\n\nPossible solution with x.compute_chunk_sizes()"
490
+ )
491
+ if reduce(mul, shape, 1) != x.size:
492
+ raise ValueError("total size of new array must be unchanged")
493
+
494
+ # Identity reshape - return input unchanged
495
+ if x.shape == shape:
496
+ return x
497
+
498
+ # Single partition case: use simple blockwise reshape
499
+ expr = x.expr
500
+ npartitions = reduce(mul, (len(c) for c in expr.chunks), 1)
501
+ if npartitions == 1:
502
+ return new_collection(ReshapeLowered(expr, shape, tuple((d,) for d in shape)))
503
+
504
+ # Handle merge_chunks=False: pre-rechunk to size-1 chunks in early dimensions
505
+ if not merge_chunks and x.ndim > len(shape):
506
+ pre_rechunk = dict.fromkeys(range(x.ndim - len(shape)), 1)
507
+ expr = expr.rechunk(pre_rechunk)
508
+
509
+ return new_collection(Reshape(expr, shape))
510
+
511
+
512
+ class ReshapeBlockwise(ArrayExpr):
513
+ """Blockwise reshape - each block reshaped independently.
514
+
515
+ Unlike regular Reshape, this doesn't rechunk. Each block is independently
516
+ reshaped and the results are concatenated. The output may have different
517
+ element ordering than NumPy's reshape.
518
+ """
519
+
520
+ _parameters = ["array", "_shape", "_chunks"]
521
+ _defaults = {"_chunks": None}
522
+
523
+ @functools.cached_property
524
+ def _meta(self):
525
+ return meta_from_array(self.array._meta, ndim=len(self._shape))
526
+
527
+ @functools.cached_property
528
+ def _reshape_info(self):
529
+ """Compute reshape mapping info (cached to avoid recomputation)."""
530
+ if len(self._shape) > self.array.ndim:
531
+ return None # Expansion case uses provided chunks directly
532
+
533
+ _, _, mapper_in, one_dimensions = reshape_rechunk(
534
+ self.array.shape,
535
+ self._shape,
536
+ self.array.chunks,
537
+ disallow_dimension_expansion=True,
538
+ )
539
+ return mapper_in, one_dimensions
540
+
541
+ @functools.cached_property
542
+ def _out_shapes(self):
543
+ """Per-block output shapes."""
544
+ if len(self._shape) > self.array.ndim:
545
+ return list(product(*(c for c in self._chunks)))
546
+
547
+ mapper_in, one_dims = self._reshape_info
548
+ return [self._convert_to_shape(c, mapper_in, one_dims) for c in product(*(c for c in self.array.chunks))]
549
+
550
+ @functools.cached_property
551
+ def chunks(self):
552
+ if len(self._shape) > self.array.ndim:
553
+ if self._chunks is None:
554
+ raise TypeError("Need to specify chunks if expanding dimensions.")
555
+ return self._chunks
556
+
557
+ mapper_in, one_dims = self._reshape_info
558
+ nr_out_chunks = self._convert_to_shape(tuple(map(len, self.array.chunks)), mapper_in, one_dims)
559
+
560
+ # Build output chunks from per-block shapes
561
+ output_chunks = []
562
+ ctr = 1
563
+ for i, nr_chunks_dim in enumerate(reversed(nr_out_chunks)):
564
+ dim_chunks = [self._out_shapes[elem * ctr][len(nr_out_chunks) - i - 1] for elem in range(nr_chunks_dim)]
565
+ output_chunks.append(tuple(dim_chunks))
566
+ ctr *= nr_chunks_dim
567
+
568
+ return tuple(reversed(output_chunks))
569
+
570
+ @staticmethod
571
+ def _convert_to_shape(shape, mapper_in, one_dims):
572
+ """Map input dimensions to output dimensions."""
573
+ output_shape = [[] for _ in range(len(set(mapper_in.values())) + len(one_dims))]
574
+ for i in one_dims:
575
+ output_shape[i] = [1]
576
+ for k, v in mapper_in.items():
577
+ output_shape[v].append(shape[k])
578
+ return tuple(reduce(mul, x) for x in output_shape)
579
+
580
+ def _layer(self) -> dict:
581
+ in_keys = list(product([self.array._name], *[range(len(c)) for c in self.array.chunks]))
582
+ out_keys = list(product([self._name], *[range(len(c)) for c in self.chunks]))
583
+
584
+ return {
585
+ out_key: Task(out_key, M.reshape, TaskRef(in_key), shape)
586
+ for in_key, out_key, shape in zip(in_keys, out_keys, self._out_shapes)
587
+ }
588
+
589
+
590
+ def reshape_blockwise(x, shape, chunks=None):
591
+ """Blockwise-reshape into a new shape.
592
+
593
+ The regular reshape operation in Dask preserves C-ordering in the array
594
+ which requires a rechunking for most reshaping operations, making the
595
+ computation relatively expensive.
596
+
597
+ Blockwise-reshape reshapes every block into the new shape and concatenates
598
+ the results. This is a trivial blockwise computation but will return the
599
+ result in a different order than NumPy. This is a good solution for
600
+ subsequent operations that don't rely on the order.
601
+
602
+ Parameters
603
+ ----------
604
+ x : Array
605
+ The input array to reshape.
606
+ shape : int or tuple of ints
607
+ The new shape should be compatible with the original shape. If
608
+ an integer, then the result will be a 1-D array of that length.
609
+ One shape dimension can be -1. In this case, the value is
610
+ inferred from the length of the array and remaining dimensions.
611
+ chunks : tuple of tuples of ints, optional
612
+ The chunk sizes for the output array. Required when expanding
613
+ dimensions (increasing ndim). Ignored when collapsing dimensions.
614
+
615
+ Returns
616
+ -------
617
+ reshaped : Array
618
+
619
+ Notes
620
+ -----
621
+ This is a parallelized version of ``np.reshape`` with the following
622
+ limitations:
623
+
624
+ 1. It does not return elements in the same order as NumPy would
625
+ 2. It only allows for reshapings that collapse like ``(1, 2, 3, 4) -> (1, 6, 4)``
626
+
627
+ Examples
628
+ --------
629
+ >>> import dask_array as da
630
+ >>> x = da.from_array(np.arange(0, 27).reshape(3, 3, 3), chunks=(3, 2, (2, 1)))
631
+ >>> result = reshape_blockwise(x, (3, 9))
632
+ >>> result.chunks
633
+ ((3,), (4, 2, 2, 1))
634
+ """
635
+ import math
636
+
637
+ from dask_array._new_collection import new_collection
638
+ from dask_array.core import asarray
639
+
640
+ x = asarray(x)
641
+
642
+ if shape in [-1, (-1,)]:
643
+ shape = (reduce(mul, x.shape),)
644
+
645
+ if not isinstance(shape, tuple):
646
+ shape = (shape,)
647
+
648
+ # Validate shape
649
+ if np.isnan(sum(x.shape)):
650
+ raise ValueError(
651
+ f"Array chunk size or shape is unknown. shape: {x.shape}\n\nPossible solution with x.compute_chunk_sizes()"
652
+ )
653
+ if reduce(mul, shape, 1) != x.size:
654
+ raise ValueError("total size of new array must be unchanged")
655
+
656
+ # Identity reshape
657
+ if len(shape) == x.ndim and shape == x.shape:
658
+ return x
659
+
660
+ # Validate chunks for expansion
661
+ if len(shape) > x.ndim:
662
+ if chunks is None:
663
+ raise TypeError("Need to specify chunks if expanding dimensions.")
664
+ out_shapes = list(product(*(c for c in chunks)))
665
+ in_shapes = list(product(*(c for c in x.chunks)))
666
+ non_matching_chunks = [
667
+ (i, in_c, out_c)
668
+ for i, (in_c, out_c) in enumerate(zip(in_shapes, out_shapes))
669
+ if math.prod(in_c) != math.prod(out_c)
670
+ ]
671
+ if non_matching_chunks:
672
+ raise ValueError(
673
+ f"Chunk sizes do not match for the following chunks: "
674
+ f"{[c[0] for c in non_matching_chunks[:5]]}. \n"
675
+ f"The corresponding chunksizes are: {[c[1:] for c in non_matching_chunks[:5]]}. "
676
+ f"(restricted to first 5 entries)."
677
+ )
678
+ elif chunks is not None:
679
+ raise ValueError("Setting chunks is not allowed when reducing the number of dimensions.")
680
+
681
+ return new_collection(ReshapeBlockwise(x.expr, shape, chunks))
682
+
683
+
684
+ def ravel(array_like):
685
+ """Return a flattened array.
686
+
687
+ Parameters
688
+ ----------
689
+ array_like : array_like
690
+ Input array. Non-array inputs are converted to arrays.
691
+
692
+ Returns
693
+ -------
694
+ raveled : Array
695
+ A 1-D array containing the elements of the input.
696
+
697
+ See Also
698
+ --------
699
+ numpy.ravel
700
+
701
+ Examples
702
+ --------
703
+ >>> import dask_array as da
704
+ >>> x = da.ones((2, 3), chunks=2)
705
+ >>> da.ravel(x).compute()
706
+ array([1., 1., 1., 1., 1., 1.])
707
+ """
708
+ from dask_array.core import asanyarray
709
+
710
+ return asanyarray(array_like).reshape((-1,))