dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,362 @@
1
+ """Tests for coarse slice pushdown through Blockwise with adjust_chunks.
2
+
3
+ When a Blockwise has adjust_chunks set, we can't push the exact slice through
4
+ because input/output chunk boundaries don't align. However, we CAN still do
5
+ a "coarse" optimization: if the output slice only needs certain blocks, we
6
+ only need the corresponding input blocks.
7
+
8
+ The coarse slice selects whole input blocks, then the original output slice
9
+ trims to the exact elements needed.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import numpy as np
15
+ import pytest
16
+
17
+ import dask_array as da
18
+ from dask_array._test_utils import assert_eq
19
+
20
+
21
+ def test_coarse_slice_simple():
22
+ """Slice selecting first output block only needs first input block.
23
+
24
+ x: 10 blocks of 10 elements (chunks=10)
25
+ y = map_blocks(double, x): 10 blocks of 20 elements (chunks=20)
26
+ y[:20] only needs output block 0, which only needs input block 0.
27
+
28
+ Result should be equivalent to: map_blocks(double, x[:10])
29
+ Since output is exactly 20 elements (one block), no outer slice needed.
30
+ """
31
+ arr = np.arange(100)
32
+ x = da.from_array(arr, chunks=10)
33
+
34
+ def double_elements(block):
35
+ return np.repeat(block, 2)
36
+
37
+ y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
38
+ result = y[:20]
39
+
40
+ # Expected: coarse-slice input, apply blockwise (output exactly matches)
41
+ expected = da.map_blocks(double_elements, x[:10], chunks=(20,), dtype=arr.dtype)
42
+
43
+ assert result.expr.simplify()._name == expected.expr.simplify()._name
44
+ assert_eq(result, np.repeat(arr, 2)[:20])
45
+
46
+
47
+ def test_coarse_slice_middle_blocks():
48
+ """Slice selecting middle blocks coarse-slices input accordingly.
49
+
50
+ y[40:80] needs output blocks 2-3, which need input blocks 2-3.
51
+ Result should be equivalent to: map_blocks(double, x[20:40])
52
+ Since we select exactly 2 full blocks (40 elements), no outer slice needed.
53
+ """
54
+ arr = np.arange(100)
55
+ x = da.from_array(arr, chunks=10)
56
+
57
+ def double_elements(block):
58
+ return np.repeat(block, 2)
59
+
60
+ y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
61
+ result = y[40:80]
62
+
63
+ # Expected: x[20:40] selects input blocks 2-3
64
+ expected = da.map_blocks(double_elements, x[20:40], chunks=(20,), dtype=arr.dtype)
65
+
66
+ assert result.expr.simplify()._name == expected.expr.simplify()._name
67
+ assert_eq(result, np.repeat(arr, 2)[40:80])
68
+
69
+
70
+ def test_coarse_slice_partial_block():
71
+ """Slice that doesn't align to block boundaries needs output trimming.
72
+
73
+ y[30:50] spans parts of blocks 1 and 2.
74
+ We need input blocks 1-2 (x[10:30]), then slice output [10:30].
75
+ """
76
+ arr = np.arange(100)
77
+ x = da.from_array(arr, chunks=10)
78
+
79
+ def double_elements(block):
80
+ return np.repeat(block, 2)
81
+
82
+ y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
83
+ result = y[30:50]
84
+
85
+ # Expected: coarse slice input blocks 1-2, then trim output
86
+ # Block 1 output is [20:40], block 2 output is [40:60]
87
+ # We want [30:50], relative to start of block 1 (offset 20) = [10:30]
88
+ coarse_input = x[10:30] # input blocks 1-2
89
+ coarse_output = da.map_blocks(double_elements, coarse_input, chunks=(20,), dtype=arr.dtype)
90
+ expected = coarse_output[10:30]
91
+
92
+ assert result.expr.simplify()._name == expected.expr.simplify()._name
93
+ assert_eq(result, np.repeat(arr, 2)[30:50])
94
+
95
+
96
+ def test_coarse_slice_2d_adjusted_axis():
97
+ """2D with adjust_chunks on axis 0, coarse slice on that axis.
98
+
99
+ y[:10, :] needs output block row 0, which needs input block row 0.
100
+ """
101
+ arr = np.arange(100).reshape(10, 10)
102
+ x = da.from_array(arr, chunks=(5, 5))
103
+
104
+ def double_rows(block):
105
+ return np.repeat(block, 2, axis=0)
106
+
107
+ y = da.map_blocks(double_rows, x, chunks=(10, 5), dtype=arr.dtype)
108
+ result = y[:10, :]
109
+
110
+ # Coarse slice: x[:5, :] selects first row of blocks
111
+ expected = da.map_blocks(double_rows, x[:5, :], chunks=(10, 5), dtype=arr.dtype)
112
+
113
+ assert result.expr.simplify()._name == expected.expr.simplify()._name
114
+ assert_eq(result, np.repeat(arr, 2, axis=0)[:10, :])
115
+
116
+
117
+ def test_coarse_optimization_reduces_tasks():
118
+ """Verify that coarse slice optimization actually reduces task count."""
119
+ arr = np.arange(1000)
120
+ x = da.from_array(arr, chunks=10) # 100 blocks
121
+
122
+ def double_elements(block):
123
+ return np.repeat(block, 2)
124
+
125
+ y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
126
+
127
+ full_tasks = len(y.optimize().__dask_graph__())
128
+
129
+ # Slice selecting 10% of output (first 10 blocks out of 100)
130
+ sliced = y[:200]
131
+ sliced_tasks = len(sliced.optimize().__dask_graph__())
132
+
133
+ # With coarse optimization: ~10 input blocks + ~10 map_blocks + getitem overhead
134
+ # Without optimization: 100 + 100 + slice
135
+ # Should see significant reduction
136
+ assert sliced_tasks < full_tasks / 3, f"Expected significant task reduction: {sliced_tasks} < {full_tasks / 3}"
137
+
138
+
139
+ def test_coarse_slice_multi_input():
140
+ """Coarse slice through blockwise with multiple inputs.
141
+
142
+ Both inputs need to be coarse-sliced to select only needed blocks.
143
+ """
144
+ arr1 = np.arange(100)
145
+ arr2 = np.arange(100, 200)
146
+ x = da.from_array(arr1, chunks=10)
147
+ y = da.from_array(arr2, chunks=10)
148
+
149
+ def combine_double(a, b):
150
+ return np.repeat(a + b, 2)
151
+
152
+ z = da.blockwise(
153
+ combine_double,
154
+ "i",
155
+ x,
156
+ "i",
157
+ y,
158
+ "i",
159
+ dtype=arr1.dtype,
160
+ adjust_chunks={"i": lambda c: c * 2},
161
+ )
162
+ result = z[:20]
163
+
164
+ # Values should be correct
165
+ assert_eq(result, np.repeat(arr1 + arr2, 2)[:20])
166
+
167
+ # Optimization should reduce tasks (10+10 blockwise + 10+10 inputs -> ~2+1)
168
+ full_tasks = len(z.optimize().__dask_graph__())
169
+ sliced_tasks = len(result.optimize().__dask_graph__())
170
+ assert sliced_tasks < full_tasks / 2
171
+
172
+
173
+ def test_coarse_slice_correctness_various():
174
+ """Value correctness tests for various slice patterns."""
175
+ arr = np.arange(100)
176
+ x = da.from_array(arr, chunks=10)
177
+
178
+ def double_elements(block):
179
+ return np.repeat(block, 2)
180
+
181
+ y = da.map_blocks(double_elements, x, chunks=(20,), dtype=arr.dtype)
182
+ expected_full = np.repeat(arr, 2)
183
+
184
+ # Test various slices
185
+ slices = [
186
+ slice(0, 20), # First block
187
+ slice(20, 60), # Blocks 1-2
188
+ slice(180, 200), # Last block
189
+ slice(15, 45), # Partial blocks
190
+ slice(0, 100), # First half
191
+ slice(100, 200), # Second half
192
+ ]
193
+
194
+ for slc in slices:
195
+ result = y[slc]
196
+ assert_eq(result, expected_full[slc], err_msg=f"Failed for slice {slc}")
197
+
198
+
199
+ def test_coarse_slice_with_broadcast():
200
+ """Coarse slice through blockwise with broadcasting input.
201
+
202
+ When one input broadcasts (has fewer dimensions or size-1 chunks),
203
+ the coarse slice should only select needed blocks from the non-broadcast input.
204
+ """
205
+ arr = np.arange(100).reshape(10, 10)
206
+ vec = np.arange(10)
207
+
208
+ x = da.from_array(arr, chunks=(5, 5))
209
+ v = da.from_array(vec, chunks=5)
210
+
211
+ # Broadcast multiply with adjust_chunks on axis 0
212
+ def double_rows(a, b):
213
+ return np.repeat(a * b, 2, axis=0)
214
+
215
+ # Use blockwise with broadcasting
216
+ z = da.blockwise(
217
+ double_rows,
218
+ "ij",
219
+ x,
220
+ "ij",
221
+ v,
222
+ "j", # v broadcasts along axis 0
223
+ dtype=arr.dtype,
224
+ adjust_chunks={"i": lambda c: c * 2},
225
+ )
226
+ assert z.shape == (20, 10)
227
+
228
+ # Slice first 10 rows (output block 0)
229
+ result = z[:10, :]
230
+
231
+ # Values should be correct
232
+ expected = np.repeat(arr * vec, 2, axis=0)[:10, :]
233
+ assert_eq(result, expected)
234
+
235
+ # Task reduction check
236
+ full_tasks = len(z.optimize().__dask_graph__())
237
+ sliced_tasks = len(result.optimize().__dask_graph__())
238
+ assert sliced_tasks < full_tasks
239
+
240
+
241
+ def test_coarse_slice_dimension_reorder():
242
+ """Coarse slice through blockwise that reorders dimensions (ij -> ji).
243
+
244
+ When blockwise transposes indices, output block (i, j) depends on
245
+ input block (j, i). The coarse slice needs to correctly map output
246
+ block ranges to input block ranges.
247
+ """
248
+ arr = np.arange(100).reshape(10, 10)
249
+ x = da.from_array(arr, chunks=(5, 5))
250
+
251
+ # Transpose with row doubling
252
+ def transpose_double(block):
253
+ return np.repeat(block.T, 2, axis=0)
254
+
255
+ z = da.blockwise(
256
+ transpose_double,
257
+ "ji", # Output is ji (transposed)
258
+ x,
259
+ "ij",
260
+ dtype=arr.dtype,
261
+ adjust_chunks={"j": lambda c: c * 2}, # j (output axis 0) gets doubled
262
+ )
263
+ assert z.shape == (20, 10)
264
+
265
+ expected_full = np.repeat(arr.T, 2, axis=0)
266
+
267
+ # Test slices on both axes
268
+ for slc, desc in [
269
+ ((slice(None, 10), slice(None)), "first 10 rows"),
270
+ ((slice(None, 10), slice(None, 5)), "first quadrant"),
271
+ ]:
272
+ result = z[slc]
273
+ assert_eq(result, expected_full[slc], err_msg=f"Failed for {desc}")
274
+
275
+ # Verify task reduction
276
+ full_tasks = len(z.optimize().__dask_graph__())
277
+ sliced_tasks = len(z[:10, :5].optimize().__dask_graph__())
278
+ assert sliced_tasks < full_tasks
279
+
280
+
281
+ def test_coarse_slice_tuple_adjust_chunks():
282
+ """Coarse slice with tuple adjust_chunks (per-block specification).
283
+
284
+ When adjust_chunks is a tuple like (5, 15, 10), it specifies exact
285
+ chunk sizes for each block. Coarse slicing should slice this tuple
286
+ to match the selected blocks.
287
+ """
288
+ arr = np.arange(30)
289
+ x = da.from_array(arr, chunks=10) # 3 blocks of 10
290
+
291
+ # Use blockwise with tuple adjust_chunks directly
292
+ def shrink_first(block):
293
+ # First 5 elements only
294
+ return block[:5]
295
+
296
+ y = da.blockwise(
297
+ shrink_first,
298
+ "i",
299
+ x,
300
+ "i",
301
+ dtype=arr.dtype,
302
+ adjust_chunks={"i": (5, 5, 5)}, # tuple specifying per-block sizes
303
+ )
304
+ assert y.shape == (15,)
305
+ assert y.chunks == ((5, 5, 5),)
306
+
307
+ # Slice to get blocks 1-2 (output elements 5:15)
308
+ result = y[5:]
309
+
310
+ # Values should be correct (elements 5-9 from block 1, 5-9 from block 2)
311
+ expected = np.concatenate([arr[10:15], arr[20:25]])
312
+ assert_eq(result, expected)
313
+
314
+ # Should have sliced adjust_chunks from (5,5,5) to (5,5)
315
+ # and reduced task count
316
+ full_tasks = len(y.optimize().__dask_graph__())
317
+ sliced_tasks = len(result.optimize().__dask_graph__())
318
+ assert sliced_tasks < full_tasks
319
+
320
+
321
+ def test_coarse_slice_irregular_chunks():
322
+ """Coarse slice with non-uniform (irregular) output chunks.
323
+
324
+ Tests that coarse slicing works correctly when output chunks have
325
+ different sizes, as is common with map_blocks(..., chunks=...).
326
+ """
327
+ arr = np.arange(100)
328
+ x = da.from_array(arr, chunks=10) # 10 uniform input blocks
329
+
330
+ # Output has irregular chunks: 15, 25, 15, 25, 15, 25, 15, 25, 15, 25
331
+ def expand_variable(block):
332
+ # Alternating expansion: some blocks grow more than others
333
+ return np.repeat(block, 2) if block[0] % 20 == 0 else np.repeat(block, 3)
334
+
335
+ # Manually specify the expected output chunk sizes
336
+ output_chunks = tuple(20 if i % 2 == 0 else 30 for i in range(10))
337
+ y = da.blockwise(
338
+ expand_variable,
339
+ "i",
340
+ x,
341
+ "i",
342
+ dtype=arr.dtype,
343
+ adjust_chunks={"i": output_chunks},
344
+ )
345
+ assert y.chunks == (output_chunks,)
346
+
347
+ # Slice selecting middle blocks (skip first block of 20, take next 50)
348
+ result = y[20:70]
349
+
350
+ # Values should be correct
351
+ expected = np.concatenate(
352
+ [
353
+ np.repeat(arr[10:20], 3), # block 1: 30 elements, take all
354
+ np.repeat(arr[20:30], 2), # block 2: 20 elements, take all
355
+ ]
356
+ )
357
+ assert_eq(result, expected)
358
+
359
+ # Task reduction check
360
+ full_tasks = len(y.optimize().__dask_graph__())
361
+ sliced_tasks = len(result.optimize().__dask_graph__())
362
+ assert sliced_tasks < full_tasks