dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
"""Tests for coarse slice pushdown through Blockwise with adjust_chunks.
|
|
2
|
+
|
|
3
|
+
When a Blockwise has adjust_chunks set, we can't push the exact slice through
|
|
4
|
+
because input/output chunk boundaries don't align. However, we CAN still do
|
|
5
|
+
a "coarse" optimization: if the output slice only needs certain blocks, we
|
|
6
|
+
only need the corresponding input blocks.
|
|
7
|
+
|
|
8
|
+
The coarse slice selects whole input blocks, then the original output slice
|
|
9
|
+
trims to the exact elements needed.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
import dask_array as da
|
|
18
|
+
from dask_array._test_utils import assert_eq
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_coarse_slice_simple():
|
|
22
|
+
"""Slice selecting first output block only needs first input block.
|
|
23
|
+
|
|
24
|
+
x: 10 blocks of 10 elements (chunks=10)
|
|
25
|
+
y = map_blocks(double, x): 10 blocks of 20 elements (chunks=20)
|
|
26
|
+
y[:20] only needs output block 0, which only needs input block 0.
|
|
27
|
+
|
|
28
|
+
Result should be equivalent to: map_blocks(double, x[:10])
|
|
29
|
+
Since output is exactly 20 elements (one block), no outer slice needed.
|
|
30
|
+
"""
|
|
31
|
+
arr = np.arange(100)
|
|
32
|
+
x = da.from_array(arr, chunks=10)
|
|
33
|
+
|
|
34
|
+
def double_elements(block):
|
|
35
|
+
return np.repeat(block, 2)
|
|
36
|
+
|
|
37
|
+
y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
|
|
38
|
+
result = y[:20]
|
|
39
|
+
|
|
40
|
+
# Expected: coarse-slice input, apply blockwise (output exactly matches)
|
|
41
|
+
expected = da.map_blocks(double_elements, x[:10], chunks=(20,), dtype=arr.dtype)
|
|
42
|
+
|
|
43
|
+
assert result.expr.simplify()._name == expected.expr.simplify()._name
|
|
44
|
+
assert_eq(result, np.repeat(arr, 2)[:20])
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_coarse_slice_middle_blocks():
|
|
48
|
+
"""Slice selecting middle blocks coarse-slices input accordingly.
|
|
49
|
+
|
|
50
|
+
y[40:80] needs output blocks 2-3, which need input blocks 2-3.
|
|
51
|
+
Result should be equivalent to: map_blocks(double, x[20:40])
|
|
52
|
+
Since we select exactly 2 full blocks (40 elements), no outer slice needed.
|
|
53
|
+
"""
|
|
54
|
+
arr = np.arange(100)
|
|
55
|
+
x = da.from_array(arr, chunks=10)
|
|
56
|
+
|
|
57
|
+
def double_elements(block):
|
|
58
|
+
return np.repeat(block, 2)
|
|
59
|
+
|
|
60
|
+
y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
|
|
61
|
+
result = y[40:80]
|
|
62
|
+
|
|
63
|
+
# Expected: x[20:40] selects input blocks 2-3
|
|
64
|
+
expected = da.map_blocks(double_elements, x[20:40], chunks=(20,), dtype=arr.dtype)
|
|
65
|
+
|
|
66
|
+
assert result.expr.simplify()._name == expected.expr.simplify()._name
|
|
67
|
+
assert_eq(result, np.repeat(arr, 2)[40:80])
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_coarse_slice_partial_block():
|
|
71
|
+
"""Slice that doesn't align to block boundaries needs output trimming.
|
|
72
|
+
|
|
73
|
+
y[30:50] spans parts of blocks 1 and 2.
|
|
74
|
+
We need input blocks 1-2 (x[10:30]), then slice output [10:30].
|
|
75
|
+
"""
|
|
76
|
+
arr = np.arange(100)
|
|
77
|
+
x = da.from_array(arr, chunks=10)
|
|
78
|
+
|
|
79
|
+
def double_elements(block):
|
|
80
|
+
return np.repeat(block, 2)
|
|
81
|
+
|
|
82
|
+
y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
|
|
83
|
+
result = y[30:50]
|
|
84
|
+
|
|
85
|
+
# Expected: coarse slice input blocks 1-2, then trim output
|
|
86
|
+
# Block 1 output is [20:40], block 2 output is [40:60]
|
|
87
|
+
# We want [30:50], relative to start of block 1 (offset 20) = [10:30]
|
|
88
|
+
coarse_input = x[10:30] # input blocks 1-2
|
|
89
|
+
coarse_output = da.map_blocks(double_elements, coarse_input, chunks=(20,), dtype=arr.dtype)
|
|
90
|
+
expected = coarse_output[10:30]
|
|
91
|
+
|
|
92
|
+
assert result.expr.simplify()._name == expected.expr.simplify()._name
|
|
93
|
+
assert_eq(result, np.repeat(arr, 2)[30:50])
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_coarse_slice_2d_adjusted_axis():
|
|
97
|
+
"""2D with adjust_chunks on axis 0, coarse slice on that axis.
|
|
98
|
+
|
|
99
|
+
y[:10, :] needs output block row 0, which needs input block row 0.
|
|
100
|
+
"""
|
|
101
|
+
arr = np.arange(100).reshape(10, 10)
|
|
102
|
+
x = da.from_array(arr, chunks=(5, 5))
|
|
103
|
+
|
|
104
|
+
def double_rows(block):
|
|
105
|
+
return np.repeat(block, 2, axis=0)
|
|
106
|
+
|
|
107
|
+
y = da.map_blocks(double_rows, x, chunks=(10, 5), dtype=arr.dtype)
|
|
108
|
+
result = y[:10, :]
|
|
109
|
+
|
|
110
|
+
# Coarse slice: x[:5, :] selects first row of blocks
|
|
111
|
+
expected = da.map_blocks(double_rows, x[:5, :], chunks=(10, 5), dtype=arr.dtype)
|
|
112
|
+
|
|
113
|
+
assert result.expr.simplify()._name == expected.expr.simplify()._name
|
|
114
|
+
assert_eq(result, np.repeat(arr, 2, axis=0)[:10, :])
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def test_coarse_optimization_reduces_tasks():
|
|
118
|
+
"""Verify that coarse slice optimization actually reduces task count."""
|
|
119
|
+
arr = np.arange(1000)
|
|
120
|
+
x = da.from_array(arr, chunks=10) # 100 blocks
|
|
121
|
+
|
|
122
|
+
def double_elements(block):
|
|
123
|
+
return np.repeat(block, 2)
|
|
124
|
+
|
|
125
|
+
y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
|
|
126
|
+
|
|
127
|
+
full_tasks = len(y.optimize().__dask_graph__())
|
|
128
|
+
|
|
129
|
+
# Slice selecting 10% of output (first 10 blocks out of 100)
|
|
130
|
+
sliced = y[:200]
|
|
131
|
+
sliced_tasks = len(sliced.optimize().__dask_graph__())
|
|
132
|
+
|
|
133
|
+
# With coarse optimization: ~10 input blocks + ~10 map_blocks + getitem overhead
|
|
134
|
+
# Without optimization: 100 + 100 + slice
|
|
135
|
+
# Should see significant reduction
|
|
136
|
+
assert sliced_tasks < full_tasks / 3, f"Expected significant task reduction: {sliced_tasks} < {full_tasks / 3}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_coarse_slice_multi_input():
|
|
140
|
+
"""Coarse slice through blockwise with multiple inputs.
|
|
141
|
+
|
|
142
|
+
Both inputs need to be coarse-sliced to select only needed blocks.
|
|
143
|
+
"""
|
|
144
|
+
arr1 = np.arange(100)
|
|
145
|
+
arr2 = np.arange(100, 200)
|
|
146
|
+
x = da.from_array(arr1, chunks=10)
|
|
147
|
+
y = da.from_array(arr2, chunks=10)
|
|
148
|
+
|
|
149
|
+
def combine_double(a, b):
|
|
150
|
+
return np.repeat(a + b, 2)
|
|
151
|
+
|
|
152
|
+
z = da.blockwise(
|
|
153
|
+
combine_double,
|
|
154
|
+
"i",
|
|
155
|
+
x,
|
|
156
|
+
"i",
|
|
157
|
+
y,
|
|
158
|
+
"i",
|
|
159
|
+
dtype=arr1.dtype,
|
|
160
|
+
adjust_chunks={"i": lambda c: c * 2},
|
|
161
|
+
)
|
|
162
|
+
result = z[:20]
|
|
163
|
+
|
|
164
|
+
# Values should be correct
|
|
165
|
+
assert_eq(result, np.repeat(arr1 + arr2, 2)[:20])
|
|
166
|
+
|
|
167
|
+
# Optimization should reduce tasks (10+10 blockwise + 10+10 inputs -> ~2+1)
|
|
168
|
+
full_tasks = len(z.optimize().__dask_graph__())
|
|
169
|
+
sliced_tasks = len(result.optimize().__dask_graph__())
|
|
170
|
+
assert sliced_tasks < full_tasks / 2
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_coarse_slice_correctness_various():
|
|
174
|
+
"""Value correctness tests for various slice patterns."""
|
|
175
|
+
arr = np.arange(100)
|
|
176
|
+
x = da.from_array(arr, chunks=10)
|
|
177
|
+
|
|
178
|
+
def double_elements(block):
|
|
179
|
+
return np.repeat(block, 2)
|
|
180
|
+
|
|
181
|
+
y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
|
|
182
|
+
expected_full = np.repeat(arr, 2)
|
|
183
|
+
|
|
184
|
+
# Test various slices
|
|
185
|
+
slices = [
|
|
186
|
+
slice(0, 20), # First block
|
|
187
|
+
slice(20, 60), # Blocks 1-2
|
|
188
|
+
slice(180, 200), # Last block
|
|
189
|
+
slice(15, 45), # Partial blocks
|
|
190
|
+
slice(0, 100), # First half
|
|
191
|
+
slice(100, 200), # Second half
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
for slc in slices:
|
|
195
|
+
result = y[slc]
|
|
196
|
+
assert_eq(result, expected_full[slc], err_msg=f"Failed for slice {slc}")
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_coarse_slice_with_broadcast():
|
|
200
|
+
"""Coarse slice through blockwise with broadcasting input.
|
|
201
|
+
|
|
202
|
+
When one input broadcasts (has fewer dimensions or size-1 chunks),
|
|
203
|
+
the coarse slice should only select needed blocks from the non-broadcast input.
|
|
204
|
+
"""
|
|
205
|
+
arr = np.arange(100).reshape(10, 10)
|
|
206
|
+
vec = np.arange(10)
|
|
207
|
+
|
|
208
|
+
x = da.from_array(arr, chunks=(5, 5))
|
|
209
|
+
v = da.from_array(vec, chunks=5)
|
|
210
|
+
|
|
211
|
+
# Broadcast multiply with adjust_chunks on axis 0
|
|
212
|
+
def double_rows(a, b):
|
|
213
|
+
return np.repeat(a * b, 2, axis=0)
|
|
214
|
+
|
|
215
|
+
# Use blockwise with broadcasting
|
|
216
|
+
z = da.blockwise(
|
|
217
|
+
double_rows,
|
|
218
|
+
"ij",
|
|
219
|
+
x,
|
|
220
|
+
"ij",
|
|
221
|
+
v,
|
|
222
|
+
"j", # v broadcasts along axis 0
|
|
223
|
+
dtype=arr.dtype,
|
|
224
|
+
adjust_chunks={"i": lambda c: c * 2},
|
|
225
|
+
)
|
|
226
|
+
assert z.shape == (20, 10)
|
|
227
|
+
|
|
228
|
+
# Slice first 10 rows (output block 0)
|
|
229
|
+
result = z[:10, :]
|
|
230
|
+
|
|
231
|
+
# Values should be correct
|
|
232
|
+
expected = np.repeat(arr * vec, 2, axis=0)[:10, :]
|
|
233
|
+
assert_eq(result, expected)
|
|
234
|
+
|
|
235
|
+
# Task reduction check
|
|
236
|
+
full_tasks = len(z.optimize().__dask_graph__())
|
|
237
|
+
sliced_tasks = len(result.optimize().__dask_graph__())
|
|
238
|
+
assert sliced_tasks < full_tasks
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def test_coarse_slice_dimension_reorder():
|
|
242
|
+
"""Coarse slice through blockwise that reorders dimensions (ij -> ji).
|
|
243
|
+
|
|
244
|
+
When blockwise transposes indices, output block (i, j) depends on
|
|
245
|
+
input block (j, i). The coarse slice needs to correctly map output
|
|
246
|
+
block ranges to input block ranges.
|
|
247
|
+
"""
|
|
248
|
+
arr = np.arange(100).reshape(10, 10)
|
|
249
|
+
x = da.from_array(arr, chunks=(5, 5))
|
|
250
|
+
|
|
251
|
+
# Transpose with row doubling
|
|
252
|
+
def transpose_double(block):
|
|
253
|
+
return np.repeat(block.T, 2, axis=0)
|
|
254
|
+
|
|
255
|
+
z = da.blockwise(
|
|
256
|
+
transpose_double,
|
|
257
|
+
"ji", # Output is ji (transposed)
|
|
258
|
+
x,
|
|
259
|
+
"ij",
|
|
260
|
+
dtype=arr.dtype,
|
|
261
|
+
adjust_chunks={"j": lambda c: c * 2}, # j (output axis 0) gets doubled
|
|
262
|
+
)
|
|
263
|
+
assert z.shape == (20, 10)
|
|
264
|
+
|
|
265
|
+
expected_full = np.repeat(arr.T, 2, axis=0)
|
|
266
|
+
|
|
267
|
+
# Test slices on both axes
|
|
268
|
+
for slc, desc in [
|
|
269
|
+
((slice(None, 10), slice(None)), "first 10 rows"),
|
|
270
|
+
((slice(None, 10), slice(None, 5)), "first quadrant"),
|
|
271
|
+
]:
|
|
272
|
+
result = z[slc]
|
|
273
|
+
assert_eq(result, expected_full[slc], err_msg=f"Failed for {desc}")
|
|
274
|
+
|
|
275
|
+
# Verify task reduction
|
|
276
|
+
full_tasks = len(z.optimize().__dask_graph__())
|
|
277
|
+
sliced_tasks = len(z[:10, :5].optimize().__dask_graph__())
|
|
278
|
+
assert sliced_tasks < full_tasks
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def test_coarse_slice_tuple_adjust_chunks():
|
|
282
|
+
"""Coarse slice with tuple adjust_chunks (per-block specification).
|
|
283
|
+
|
|
284
|
+
When adjust_chunks is a tuple like (5, 15, 10), it specifies exact
|
|
285
|
+
chunk sizes for each block. Coarse slicing should slice this tuple
|
|
286
|
+
to match the selected blocks.
|
|
287
|
+
"""
|
|
288
|
+
arr = np.arange(30)
|
|
289
|
+
x = da.from_array(arr, chunks=10) # 3 blocks of 10
|
|
290
|
+
|
|
291
|
+
# Use blockwise with tuple adjust_chunks directly
|
|
292
|
+
def shrink_first(block):
|
|
293
|
+
# First 5 elements only
|
|
294
|
+
return block[:5]
|
|
295
|
+
|
|
296
|
+
y = da.blockwise(
|
|
297
|
+
shrink_first,
|
|
298
|
+
"i",
|
|
299
|
+
x,
|
|
300
|
+
"i",
|
|
301
|
+
dtype=arr.dtype,
|
|
302
|
+
adjust_chunks={"i": (5, 5, 5)}, # tuple specifying per-block sizes
|
|
303
|
+
)
|
|
304
|
+
assert y.shape == (15,)
|
|
305
|
+
assert y.chunks == ((5, 5, 5),)
|
|
306
|
+
|
|
307
|
+
# Slice to get blocks 1-2 (output elements 5:15)
|
|
308
|
+
result = y[5:]
|
|
309
|
+
|
|
310
|
+
# Values should be correct (elements 5-9 from block 1, 5-9 from block 2)
|
|
311
|
+
expected = np.concatenate([arr[10:15], arr[20:25]])
|
|
312
|
+
assert_eq(result, expected)
|
|
313
|
+
|
|
314
|
+
# Should have sliced adjust_chunks from (5,5,5) to (5,5)
|
|
315
|
+
# and reduced task count
|
|
316
|
+
full_tasks = len(y.optimize().__dask_graph__())
|
|
317
|
+
sliced_tasks = len(result.optimize().__dask_graph__())
|
|
318
|
+
assert sliced_tasks < full_tasks
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def test_coarse_slice_irregular_chunks():
|
|
322
|
+
"""Coarse slice with non-uniform (irregular) output chunks.
|
|
323
|
+
|
|
324
|
+
Tests that coarse slicing works correctly when output chunks have
|
|
325
|
+
different sizes, as is common with map_blocks(..., chunks=...).
|
|
326
|
+
"""
|
|
327
|
+
arr = np.arange(100)
|
|
328
|
+
x = da.from_array(arr, chunks=10) # 10 uniform input blocks
|
|
329
|
+
|
|
330
|
+
# Output has irregular chunks: 15, 25, 15, 25, 15, 25, 15, 25, 15, 25
|
|
331
|
+
def expand_variable(block):
|
|
332
|
+
# Alternating expansion: some blocks grow more than others
|
|
333
|
+
return np.repeat(block, 2) if block[0] % 20 == 0 else np.repeat(block, 3)
|
|
334
|
+
|
|
335
|
+
# Manually specify the expected output chunk sizes
|
|
336
|
+
output_chunks = tuple(20 if i % 2 == 0 else 30 for i in range(10))
|
|
337
|
+
y = da.blockwise(
|
|
338
|
+
expand_variable,
|
|
339
|
+
"i",
|
|
340
|
+
x,
|
|
341
|
+
"i",
|
|
342
|
+
dtype=arr.dtype,
|
|
343
|
+
adjust_chunks={"i": output_chunks},
|
|
344
|
+
)
|
|
345
|
+
assert y.chunks == (output_chunks,)
|
|
346
|
+
|
|
347
|
+
# Slice selecting middle blocks (skip first block of 20, take next 50)
|
|
348
|
+
result = y[20:70]
|
|
349
|
+
|
|
350
|
+
# Values should be correct
|
|
351
|
+
expected = np.concatenate(
|
|
352
|
+
[
|
|
353
|
+
np.repeat(arr[10:20], 3), # block 1: 30 elements, take all
|
|
354
|
+
np.repeat(arr[20:30], 2), # block 2: 20 elements, take all
|
|
355
|
+
]
|
|
356
|
+
)
|
|
357
|
+
assert_eq(result, expected)
|
|
358
|
+
|
|
359
|
+
# Task reduction check
|
|
360
|
+
full_tasks = len(y.optimize().__dask_graph__())
|
|
361
|
+
sliced_tasks = len(result.optimize().__dask_graph__())
|
|
362
|
+
assert sliced_tasks < full_tasks
|