dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,839 @@
1
+ from __future__ import annotations
2
+
3
+ import itertools
4
+ import warnings
5
+
6
+ import pytest
7
+
8
+ from dask._task_spec import Alias, Task, TaskRef
9
+ from dask.delayed import delayed
10
+
11
+ np = pytest.importorskip("numpy")
12
+
13
+ import dask
14
+ import dask_array as da
15
+ from dask_array._chunk import getitem
16
+
17
+ local_getitem = getitem # alias for tests that check internal implementation
18
+ from dask_array.slicing import normalize_index
19
+ from dask_array.slicing._utils import (
20
+ SlicingNoop,
21
+ _sanitize_index_element,
22
+ _slice_1d,
23
+ make_block_sorted_slices,
24
+ new_blockdim,
25
+ sanitize_index,
26
+ shuffle_slice,
27
+ )
28
+ from dask_array._test_utils import assert_eq, same_keys
29
+
30
+
31
+ # Legacy helper functions for testing - these test dask.array internals, not dask_array expressions
32
+ # These functions have different signatures/behavior than the expression-based versions in dask_array.slicing
33
+
34
+
35
+ def test_slice_1d():
36
+ expected = {0: slice(10, 25, 1), 1: slice(None, None, None), 2: slice(0, 1, 1)}
37
+ result = _slice_1d(100, [25] * 4, slice(10, 51, None))
38
+ assert expected == result
39
+
40
+ # x[100:12:-3]
41
+ expected = {
42
+ 0: slice(-2, -8, -3),
43
+ 1: slice(-1, -21, -3),
44
+ 2: slice(-3, -21, -3),
45
+ 3: slice(-2, -21, -3),
46
+ 4: slice(-1, -21, -3),
47
+ }
48
+ result = _slice_1d(100, [20] * 5, slice(100, 12, -3))
49
+ assert expected == result
50
+
51
+ # x[102::-3]
52
+ expected = {
53
+ 0: slice(-2, -21, -3),
54
+ 1: slice(-1, -21, -3),
55
+ 2: slice(-3, -21, -3),
56
+ 3: slice(-2, -21, -3),
57
+ 4: slice(-1, -21, -3),
58
+ }
59
+ result = _slice_1d(100, [20] * 5, slice(102, None, -3))
60
+ assert expected == result
61
+
62
+ # x[::-4]
63
+ expected = {
64
+ 0: slice(-1, -21, -4),
65
+ 1: slice(-1, -21, -4),
66
+ 2: slice(-1, -21, -4),
67
+ 3: slice(-1, -21, -4),
68
+ 4: slice(-1, -21, -4),
69
+ }
70
+ result = _slice_1d(100, [20] * 5, slice(None, None, -4))
71
+ assert expected == result
72
+
73
+ # x[::-7]
74
+ expected = {
75
+ 0: slice(-5, -21, -7),
76
+ 1: slice(-4, -21, -7),
77
+ 2: slice(-3, -21, -7),
78
+ 3: slice(-2, -21, -7),
79
+ 4: slice(-1, -21, -7),
80
+ }
81
+ result = _slice_1d(100, [20] * 5, slice(None, None, -7))
82
+ assert expected == result
83
+
84
+ # x=range(115)
85
+ # x[::-7]
86
+ expected = {
87
+ 0: slice(-7, -24, -7),
88
+ 1: slice(-2, -24, -7),
89
+ 2: slice(-4, -24, -7),
90
+ 3: slice(-6, -24, -7),
91
+ 4: slice(-1, -24, -7),
92
+ }
93
+ result = _slice_1d(115, [23] * 5, slice(None, None, -7))
94
+ assert expected == result
95
+
96
+ # x[79::-3]
97
+ expected = {
98
+ 0: slice(-1, -21, -3),
99
+ 1: slice(-3, -21, -3),
100
+ 2: slice(-2, -21, -3),
101
+ 3: slice(-1, -21, -3),
102
+ }
103
+ result = _slice_1d(100, [20] * 5, slice(79, None, -3))
104
+ assert expected == result
105
+
106
+ # x[-1:-8:-1]
107
+ expected = {4: slice(-1, -8, -1)}
108
+ result = _slice_1d(100, [20, 20, 20, 20, 20], slice(-1, 92, -1))
109
+ assert expected == result
110
+
111
+ # x[20:0:-1]
112
+ expected = {0: slice(-1, -20, -1), 1: slice(-20, -21, -1)}
113
+ result = _slice_1d(100, [20, 20, 20, 20, 20], slice(20, 0, -1))
114
+ assert expected == result
115
+
116
+ # x[:0]
117
+ expected = {}
118
+ result = _slice_1d(100, [20, 20, 20, 20, 20], slice(0))
119
+ assert result
120
+
121
+ # x=range(99)
122
+ expected = {
123
+ 0: slice(-3, -21, -3),
124
+ 1: slice(-2, -21, -3),
125
+ 2: slice(-1, -21, -3),
126
+ 3: slice(-2, -20, -3),
127
+ 4: slice(-1, -21, -3),
128
+ }
129
+ # This array has non-uniformly sized blocks
130
+ result = _slice_1d(99, [20, 20, 20, 19, 20], slice(100, None, -3))
131
+ assert expected == result
132
+
133
+ # x=range(104)
134
+ # x[::-3]
135
+ expected = {
136
+ 0: slice(-1, -21, -3),
137
+ 1: slice(-3, -24, -3),
138
+ 2: slice(-3, -28, -3),
139
+ 3: slice(-1, -14, -3),
140
+ 4: slice(-1, -22, -3),
141
+ }
142
+ # This array has non-uniformly sized blocks
143
+ result = _slice_1d(104, [20, 23, 27, 13, 21], slice(None, None, -3))
144
+ assert expected == result
145
+
146
+ # x=range(104)
147
+ # x[:27:-3]
148
+ expected = {
149
+ 1: slice(-3, -16, -3),
150
+ 2: slice(-3, -28, -3),
151
+ 3: slice(-1, -14, -3),
152
+ 4: slice(-1, -22, -3),
153
+ }
154
+ # This array has non-uniformly sized blocks
155
+ result = _slice_1d(104, [20, 23, 27, 13, 21], slice(None, 27, -3))
156
+ assert expected == result
157
+
158
+ # x=range(104)
159
+ # x[100:27:-3]
160
+ expected = {
161
+ 1: slice(-3, -16, -3),
162
+ 2: slice(-3, -28, -3),
163
+ 3: slice(-1, -14, -3),
164
+ 4: slice(-4, -22, -3),
165
+ }
166
+ # This array has non-uniformly sized blocks
167
+ result = _slice_1d(104, [20, 23, 27, 13, 21], slice(100, 27, -3))
168
+ assert expected == result
169
+
170
+ # x=range(1000000000000)
171
+ # x[1000:]
172
+ expected = {0: slice(1000, 1000000000, 1)}
173
+ expected.update({ii: slice(None, None, None) for ii in range(1, 1000)})
174
+ # This array is large
175
+ result = _slice_1d(1000000000000, [1000000000] * 1000, slice(1000, None, None))
176
+ assert expected == result
177
+
178
+
179
+ def test_slice_singleton_value_on_boundary():
180
+ assert _slice_1d(15, [5, 5, 5], 10) == {2: 0}
181
+ assert _slice_1d(30, (5, 5, 5, 5, 5, 5), 10) == {2: 0}
182
+
183
+
184
+ def test_mixed_index():
185
+ da_array = da.ones((1, 1, 31, 40))
186
+ new = da_array[(np.array([0]), 0, slice(None), slice(None))]
187
+ assert isinstance(new, da.Array)
188
+ assert_eq(new, da_array[0])
189
+
190
+
191
+ def test_slicing_and_chunks():
192
+ o = da.ones((24, 16), chunks=((4, 8, 8, 4), (2, 6, 6, 2)))
193
+ t = o[4:-4, 2:-2]
194
+ assert t.chunks == ((8, 8), (6, 6))
195
+
196
+
197
+ def test_slicing_and_unknown_chunks():
198
+ a = da.ones((10, 5), chunks=5)
199
+ a._chunks = ((np.nan, np.nan), (5,))
200
+ with pytest.raises(ValueError, match="Array chunk size or shape is unknown"):
201
+ a[[0, 5]].compute()
202
+
203
+
204
+ def test_slicing_identities():
205
+ a = da.ones((24, 16), chunks=((4, 8, 8, 4), (2, 6, 6, 2)))
206
+
207
+ assert a is a[slice(None)]
208
+ assert a is a[:]
209
+ assert a is a[::]
210
+ assert a is a[...]
211
+ assert a is a[0:]
212
+ assert a is a[0::]
213
+ assert a is a[::1]
214
+ assert a is a[0 : len(a)]
215
+ assert a is a[0::1]
216
+ assert a is a[0 : len(a) : 1]
217
+
218
+
219
+ def test_slice_stop_0():
220
+ # from gh-125
221
+ a = da.ones(10, chunks=(10,))[:0].compute()
222
+ b = np.ones(10)[:0]
223
+ assert_eq(a, b)
224
+
225
+
226
+ def test_slice_list_then_None():
227
+ x = da.zeros(shape=(5, 5), chunks=(3, 3))
228
+ y = x[[2, 1]][None]
229
+
230
+ assert_eq(y, np.zeros((1, 2, 5)))
231
+
232
+
233
+ class ReturnItem:
234
+ def __getitem__(self, key):
235
+ return key
236
+
237
+
238
+ @pytest.mark.skip(reason="really long test")
239
+ def test_slicing_exhaustively():
240
+ x = np.random.default_rng().random(6, 7, 8)
241
+ a = da.from_array(x, chunks=(3, 3, 3))
242
+ I = ReturnItem()
243
+
244
+ # independent indexing along different axes
245
+ indexers = [0, -2, I[:], I[:5], [0, 1], [0, 1, 2], [4, 2], I[::-1], None, I[:0], []]
246
+ for i in indexers:
247
+ assert_eq(x[i], a[i])
248
+ for j in indexers:
249
+ assert_eq(x[i][:, j], a[i][:, j])
250
+ assert_eq(x[:, i][j], a[:, i][j])
251
+ for k in indexers:
252
+ assert_eq(x[..., i][:, j][k], a[..., i][:, j][k])
253
+
254
+ # repeated indexing along the first axis
255
+ first_indexers = [I[:], I[:5], np.arange(5), [3, 1, 4, 5, 0], np.arange(6) < 6]
256
+ second_indexers = [0, -1, 3, I[:], I[:3], I[2:-1], [2, 4], [], I[:0]]
257
+ for i in first_indexers:
258
+ for j in second_indexers:
259
+ assert_eq(x[i][j], a[i][j])
260
+
261
+
262
+ def test_slicing_with_negative_step_flops_keys():
263
+ x = da.arange(10, chunks=5)
264
+ y = x[:1:-1]
265
+ assert (x.name, 1) in y.dask[(y.name, 0)].dependencies
266
+ assert (x.name, 0) in y.dask[(y.name, 1)].dependencies
267
+
268
+ assert_eq(y, np.arange(10)[:1:-1])
269
+
270
+ assert y.chunks == ((5, 3),)
271
+
272
+ assert y.dask[(y.name, 0)] == Task((y.name, 0), local_getitem, TaskRef((x.name, 1)), (slice(-1, -6, -1),))
273
+ assert y.dask[(y.name, 1)] == Task((y.name, 1), local_getitem, TaskRef((x.name, 0)), (slice(-1, -4, -1),))
274
+
275
+
276
+ def test_empty_slice():
277
+ x = da.ones((5, 5), chunks=(2, 2), dtype="i4")
278
+ y = x[:0]
279
+
280
+ assert_eq(y, np.ones((5, 5), dtype="i4")[:0])
281
+
282
+
283
+ def test_multiple_list_slicing():
284
+ x = np.random.default_rng().random((6, 7, 8))
285
+ a = da.from_array(x, chunks=(3, 3, 3))
286
+ assert_eq(x[:, [0, 1, 2]][[0, 1]], a[:, [0, 1, 2]][[0, 1]])
287
+
288
+
289
+ def test_boolean_list_slicing():
290
+ with pytest.raises(IndexError):
291
+ da.asarray(range(2))[[True]]
292
+ with pytest.raises(IndexError):
293
+ da.asarray(range(2))[[False, False, False]]
294
+ x = np.arange(5)
295
+ ind = [True, False, False, False, True]
296
+ assert_eq(da.asarray(x)[ind], x[ind])
297
+ # https://github.com/dask/dask/issues/3706
298
+ ind = [True]
299
+ assert_eq(da.asarray([0])[ind], np.arange(1)[ind])
300
+
301
+
302
+ def test_boolean_numpy_array_slicing():
303
+ with pytest.raises(IndexError):
304
+ da.asarray(range(2))[np.array([True])]
305
+ with pytest.raises(IndexError):
306
+ da.asarray(range(2))[np.array([False, False, False])]
307
+ x = np.arange(5)
308
+ ind = np.array([True, False, False, False, True])
309
+ assert_eq(da.asarray(x)[ind], x[ind])
310
+ # https://github.com/dask/dask/issues/3706
311
+ ind = np.array([True])
312
+ assert_eq(da.asarray([0])[ind], np.arange(1)[ind])
313
+
314
+
315
+ def test_empty_list():
316
+ x = np.ones((5, 5, 5), dtype="i4")
317
+ dx = da.from_array(x, chunks=2)
318
+
319
+ assert_eq(dx[[], :3, :2], x[[], :3, :2])
320
+ assert_eq(dx[:3, [], :2], x[:3, [], :2])
321
+ assert_eq(dx[:3, :2, []], x[:3, :2, []])
322
+
323
+
324
+ def test_uneven_chunks():
325
+ assert da.ones(20, chunks=5)[::2].chunks == ((3, 2, 3, 2),)
326
+
327
+
328
+ def test_new_blockdim():
329
+ assert new_blockdim(20, [5, 5, 5, 5], slice(0, None, 2)) == [3, 2, 3, 2]
330
+
331
+
332
+ def test_slicing_consistent_names():
333
+ x = np.arange(100).reshape((10, 10))
334
+ a = da.from_array(x, chunks=(5, 5))
335
+ assert same_keys(a[0], a[0])
336
+ assert same_keys(a[:, [1, 2, 3]], a[:, [1, 2, 3]])
337
+ assert same_keys(a[:, 5:2:-1], a[:, 5:2:-1])
338
+ assert same_keys(a[0, ...], a[0, ...])
339
+ assert same_keys(a[...], a[...])
340
+ assert same_keys(a[[1, 3, 5]], a[[1, 3, 5]])
341
+ assert same_keys(a[-11:11], a[:])
342
+ assert same_keys(a[-11:-9], a[:1])
343
+ assert same_keys(a[-1], a[9])
344
+ assert same_keys(a[0::-1], a[0:-11:-1])
345
+
346
+
347
+ def test_slicing_consistent_names_after_normalization():
348
+ x = da.zeros(10, chunks=(5,))
349
+ assert same_keys(x[0:], x[:10])
350
+ assert same_keys(x[0:], x[0:10])
351
+ assert same_keys(x[0:], x[0:10:1])
352
+ assert same_keys(x[:], x[0:10:1])
353
+
354
+
355
+ def test_sanitize_index_element():
356
+ with pytest.raises(TypeError):
357
+ _sanitize_index_element("Hello!")
358
+
359
+
360
+ def test_sanitize_index():
361
+ pd = pytest.importorskip("pandas")
362
+ with pytest.raises(TypeError):
363
+ sanitize_index("Hello!")
364
+
365
+ np.testing.assert_equal(sanitize_index(pd.Series([1, 2, 3])), [1, 2, 3])
366
+ np.testing.assert_equal(sanitize_index((1, 2, 3)), [1, 2, 3])
367
+
368
+
369
+ def test_oob_check():
370
+ x = da.ones(5, chunks=(2,))
371
+ with pytest.raises(IndexError):
372
+ x[6]
373
+ with pytest.raises(IndexError):
374
+ x[[6]]
375
+ with pytest.raises(IndexError):
376
+ x[-10]
377
+ with pytest.raises(IndexError):
378
+ x[[-10]]
379
+ with pytest.raises(IndexError):
380
+ x[0, 0]
381
+
382
+
383
+ @pytest.mark.parametrize("idx_chunks", [None, 3, 2, 1])
384
+ @pytest.mark.parametrize("x_chunks", [None, (3, 5), (2, 3), (1, 2), (1, 1)])
385
+ def test_index_with_int_dask_array(x_chunks, idx_chunks, request):
386
+ # test data is crafted to stress use cases:
387
+ # - pick from different chunks of x out of order
388
+ # - a chunk of x contains no matches
389
+ # - only one chunk of x
390
+ x = np.array([[10, 20, 30, 40, 50], [60, 70, 80, 90, 100], [110, 120, 130, 140, 150]])
391
+ idx = np.array([3, 0, 1])
392
+ expect = np.array([[40, 10, 20], [90, 60, 70], [140, 110, 120]])
393
+
394
+ if x_chunks is not None:
395
+ x = da.from_array(x, chunks=x_chunks)
396
+ if idx_chunks is not None:
397
+ idx = da.from_array(idx, chunks=idx_chunks)
398
+
399
+ assert_eq(x[:, idx], expect)
400
+ assert_eq(x.T[idx, :], expect.T)
401
+
402
+
403
+ @pytest.mark.parametrize("chunks", [1, 2, 3])
404
+ def test_index_with_int_dask_array_0d(chunks):
405
+ # Slice by 0-dimensional array
406
+ x = da.from_array([[10, 20, 30], [40, 50, 60]], chunks=chunks)
407
+ idx0 = da.from_array(1, chunks=1)
408
+ assert_eq(x[idx0, :], x[1, :])
409
+ assert_eq(x[:, idx0], x[:, 1])
410
+
411
+
412
+ @pytest.mark.parametrize("chunks", [1, 2, 3, 4, 5])
413
+ def test_index_with_int_dask_array_nanchunks(chunks):
414
+ # Slice by array with nan-sized chunks
415
+ a = da.arange(-2, 3, chunks=chunks)
416
+ assert_eq(a[a.nonzero()], np.array([-2, -1, 1, 2]))
417
+ # Edge case: the nan-sized chunks resolve to size 0
418
+ a = da.zeros(5, chunks=chunks)
419
+ assert_eq(a[a.nonzero()], np.array([]))
420
+
421
+
422
+ @pytest.mark.parametrize("chunks", [2, 4])
423
+ def test_index_with_int_dask_array_negindex(chunks):
424
+ a = da.arange(4, chunks=chunks)
425
+ idx = da.from_array([-1, -4], chunks=1)
426
+ assert_eq(a[idx], np.array([3, 0]))
427
+
428
+
429
+ @pytest.mark.parametrize("chunks", [2, 4])
430
+ def test_index_with_int_dask_array_indexerror(chunks):
431
+ a = da.arange(4, chunks=chunks)
432
+ idx = da.from_array([4], chunks=1)
433
+ with pytest.raises(IndexError):
434
+ a[idx].compute()
435
+ idx = da.from_array([-5], chunks=1)
436
+ with pytest.raises(IndexError):
437
+ a[idx].compute()
438
+
439
+
440
+ @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64", "uint8", "uint16", "uint32", "uint64"])
441
+ def test_index_with_int_dask_array_dtypes(dtype):
442
+ a = da.from_array([10, 20, 30, 40], chunks=-1)
443
+ idx = da.from_array(np.array([1, 2]).astype(dtype), chunks=1)
444
+ assert_eq(a[idx], np.array([20, 30]))
445
+
446
+
447
+ def test_index_with_bool_dask_array():
448
+ x = np.arange(36).reshape((6, 6))
449
+ d = da.from_array(x, chunks=(3, 3))
450
+ ind = np.asarray([True, True, False, True, False, False], dtype=bool)
451
+ ind = da.from_array(ind, chunks=2)
452
+ for index in [ind, (slice(1, 9, 2), ind), (ind, slice(2, 8, 1))]:
453
+ x_index = dask.compute(index)[0]
454
+ assert_eq(x[x_index], d[index])
455
+
456
+
457
+ def test_index_with_bool_dask_array_2():
458
+ rng = np.random.default_rng()
459
+ x = rng.random((10, 10, 10))
460
+ ind = rng.random(10) > 0.5
461
+
462
+ d = da.from_array(x, chunks=(3, 4, 5))
463
+ dind = da.from_array(ind, chunks=4)
464
+
465
+ index = [slice(1, 9, 1), slice(None)]
466
+
467
+ for i in range(x.ndim):
468
+ index2 = index[:]
469
+ index2.insert(i, dind)
470
+
471
+ index3 = index[:]
472
+ index3.insert(i, ind)
473
+
474
+ assert_eq(x[tuple(index3)], d[tuple(index2)])
475
+
476
+
477
+ @pytest.mark.xfail(reason="tests internal graph optimization, not user behavior")
478
+ def test_cull():
479
+ x = da.ones(1000, chunks=(10,))
480
+
481
+ for slc in [1, slice(0, 30), slice(0, None, 100)]:
482
+ y = x[slc]
483
+ assert len(y.dask) < len(x.dask)
484
+
485
+
486
+ @pytest.mark.parametrize("shape", [(2,), (2, 3), (2, 3, 5)])
487
+ @pytest.mark.parametrize("index", [(Ellipsis,), (None, Ellipsis), (Ellipsis, None), (None, Ellipsis, None)])
488
+ def test_slicing_with_Nones(shape, index):
489
+ x = np.random.default_rng().random(shape)
490
+ d = da.from_array(x, chunks=shape)
491
+
492
+ assert_eq(x[index], d[index])
493
+
494
+
495
+ indexers = [Ellipsis, slice(2), 0, 1, -2, -1, slice(-2, None), None]
496
+
497
+
498
+ """
499
+ # We comment this out because it is 4096 tests
500
+ @pytest.mark.parametrize('a', indexers)
501
+ @pytest.mark.parametrize('b', indexers)
502
+ @pytest.mark.parametrize('c', indexers)
503
+ @pytest.mark.parametrize('d', indexers)
504
+ def test_slicing_none_int_ellipses(a, b, c, d):
505
+ if (a, b, c, d).count(Ellipsis) > 1:
506
+ return
507
+ shape = (2,3,5,7,11)
508
+ x = np.arange(np.prod(shape)).reshape(shape)
509
+ y = da.core.asarray(x)
510
+
511
+ xx = x[a, b, c, d]
512
+ yy = y[a, b, c, d]
513
+ assert_eq(xx, yy)
514
+ """
515
+
516
+
517
+ def test_slicing_integer_no_warnings():
518
+ # https://github.com/dask/dask/pull/2457/
519
+ X = da.random.default_rng().random(size=(100, 2), chunks=(2, 2))
520
+ idx = np.array([0, 0, 1, 1])
521
+ with warnings.catch_warnings(record=True) as record:
522
+ X[idx].compute()
523
+ assert not record
524
+
525
+
526
+ @pytest.mark.slow
527
+ def test_slicing_none_int_ellipes():
528
+ shape = (2, 3, 5, 7, 11)
529
+ x = np.arange(np.prod(shape)).reshape(shape)
530
+ y = da.core.asarray(x)
531
+ for ind in itertools.product(indexers, indexers, indexers, indexers):
532
+ if ind.count(Ellipsis) > 1:
533
+ continue
534
+
535
+ assert_eq(x[ind], y[ind])
536
+
537
+
538
+ def test_None_overlap_int():
539
+ a, b, c, d = (0, slice(None, 2, None), None, Ellipsis)
540
+ shape = (2, 3, 5, 7, 11)
541
+ x = np.arange(np.prod(shape)).reshape(shape)
542
+ y = da.core.asarray(x)
543
+
544
+ xx = x[a, b, c, d]
545
+ yy = y[a, b, c, d]
546
+ assert_eq(xx, yy)
547
+
548
+
549
+ def test_negative_n_slicing():
550
+ assert_eq(da.ones(2, chunks=2)[-2], np.ones(2)[-2])
551
+
552
+
553
+ def test_negative_list_slicing():
554
+ x = np.arange(5)
555
+ dx = da.from_array(x, chunks=2)
556
+ assert_eq(dx[[0, -5]], x[[0, -5]])
557
+ assert_eq(dx[[4, -1]], x[[4, -1]])
558
+
559
+
560
+ def test_permit_oob_slices():
561
+ x = np.arange(5)
562
+ dx = da.from_array(x, chunks=2)
563
+
564
+ assert_eq(x[-102:], dx[-102:])
565
+ assert_eq(x[102:], dx[102:])
566
+ assert_eq(x[:102], dx[:102])
567
+ assert_eq(x[:-102], dx[:-102])
568
+
569
+
570
+ def test_normalize_index():
571
+ assert normalize_index((Ellipsis, None), (10,)) == (slice(None), None)
572
+ assert normalize_index(5, (np.nan,)) == (5,)
573
+ assert normalize_index(-5, (np.nan,)) == (-5,)
574
+ (result,) = normalize_index([-5, -2, 1], (np.nan,))
575
+ assert result.tolist() == [-5, -2, 1]
576
+ assert normalize_index(slice(-5, -2), (np.nan,)) == (slice(-5, -2),)
577
+
578
+
579
+ def test_take_semi_sorted():
580
+ x = da.ones(10, chunks=(5,))
581
+ index = np.arange(15) % 10
582
+
583
+ y = x[index]
584
+ assert y.chunks == ((5, 5, 5),)
585
+
586
+
587
+ def test_getitem_avoids_large_chunks():
588
+ with dask.config.set({"array.chunk-size": "0.1Mb"}):
589
+ a = np.arange(2 * 128 * 128, dtype="int64").reshape(2, 128, 128)
590
+ indexer = [0] + [1] * 11
591
+ arr = da.from_array(a, chunks=(1, 8, 8))
592
+ result = arr[indexer] # small chunks within the chunk-size limit should NOT raise PerformanceWarning
593
+ expected = a[indexer]
594
+ assert_eq(result, expected)
595
+
596
+ arr = da.from_array(a, chunks=(1, 128, 128)) # large chunks
597
+ expected = a[indexer]
598
+
599
+ result = arr[indexer]
600
+ assert_eq(result, expected)
601
+ # Groups larger than input chunk size get split to avoid oversized outputs.
602
+ assert result.chunks == ((1,) * 12, (128,), (128,))
603
+
604
+ # Users can silence the warning
605
+ with dask.config.set({"array.slicing.split-large-chunks": False}):
606
+ with warnings.catch_warnings(record=True) as record:
607
+ result = arr[indexer]
608
+ assert_eq(result, expected)
609
+ assert not record
610
+
611
+ # Users can silence the warning
612
+ with dask.config.set({"array.slicing.split-large-chunks": True}):
613
+ with warnings.catch_warnings(record=True) as record:
614
+ result = arr[indexer]
615
+ assert_eq(result, expected)
616
+ assert not record
617
+ assert result.chunks == ((1,) * 12, (128,), (128,))
618
+
619
+
620
+ def test_getitem_avoids_large_chunks_missing():
621
+ # We cannot apply the "avoid large chunks" optimization when
622
+ # the chunks have unknown sizes.
623
+ with dask.config.set({"array.chunk-size": "0.1Mb"}):
624
+ a = np.arange(4 * 500 * 500).reshape(4, 500, 500)
625
+ arr = da.from_array(a, chunks=(1, 500, 500))
626
+ arr._chunks = ((1, 1, 1, 1), (np.nan,), (np.nan,))
627
+ indexer = [0, 1] + [2] * 100 + [3]
628
+ expected = a[indexer]
629
+ result = arr[indexer]
630
+ assert_eq(result, expected)
631
+
632
+
633
+ def test_pathological_unsorted_slicing():
634
+ x = da.ones(100, chunks=10)
635
+
636
+ # [0, 10, 20, ... 90, 1, 11, 21, ... 91, ...]
637
+ index = np.arange(100).reshape(10, 10).ravel(order="F")
638
+
639
+ assert_eq(x[index], x.compute()[index])
640
+
641
+
642
+ @pytest.mark.parametrize("params", [(2, 2, 1), (5, 3, 2)])
643
+ def test_setitem_with_different_chunks_preserves_shape(params):
644
+ """Reproducer for https://github.com/dask/dask/issues/3730.
645
+
646
+ Mutating based on an array with different chunks can cause new chunks to be
647
+ used. We need to ensure those new chunk sizes are applied to the mutated
648
+ array, otherwise the array won't generate the correct keys.
649
+ """
650
+ array_size, chunk_size1, chunk_size2 = params
651
+ x = da.zeros(array_size, chunks=chunk_size1)
652
+ mask = da.zeros(array_size, chunks=chunk_size2)
653
+ x[mask] = 1
654
+ result = x.compute()
655
+ assert x.shape == result.shape
656
+
657
+
658
+ def test_gh3579():
659
+ assert_eq(np.arange(10)[0::-1], da.arange(10, chunks=3)[0::-1])
660
+ assert_eq(np.arange(10)[::-1], da.arange(10, chunks=3)[::-1])
661
+
662
+
663
+ def test_make_blockwise_sorted_slice():
664
+ x = da.arange(8, chunks=4)
665
+ index = np.array([6, 0, 4, 2, 7, 1, 5, 3])
666
+
667
+ a, b = make_block_sorted_slices(index, x.chunks)
668
+
669
+ index2 = np.array([0, 2, 4, 6, 1, 3, 5, 7])
670
+ index3 = np.array([3, 0, 2, 1, 7, 4, 6, 5])
671
+ np.testing.assert_array_equal(a, index2)
672
+ np.testing.assert_array_equal(b, index3)
673
+
674
+
675
+ @pytest.mark.filterwarnings("ignore:Slicing:dask.array.core.PerformanceWarning")
676
+ @pytest.mark.parametrize("size, chunks", [((100, 2), (50, 2)), ((100, 2), (37, 1)), ((100,), (55,))])
677
+ def test_shuffle_slice(size, chunks):
678
+ x = da.random.default_rng().integers(0, 1000, size=size, chunks=chunks)
679
+ index = np.arange(len(x))
680
+ np.random.default_rng().shuffle(index)
681
+
682
+ a = x[index]
683
+ b = shuffle_slice(x, index)
684
+ assert_eq(a, b)
685
+
686
+ index = np.arange(1, len(x)).tolist()
687
+ index.append(0)
688
+ index = np.array(index)
689
+ a = x[index]
690
+ b = shuffle_slice(x, index)
691
+ assert_eq(a, b)
692
+
693
+
694
+ def test_unknown_chunks_length_one():
695
+ a = np.arange(256, dtype=int)
696
+ arr = da.from_array(a, chunks=(256,))
697
+ # np.flatnonzero dispatches
698
+ result = np.flatnonzero(arr)
699
+ assert_eq(result[[0, -1]], np.flatnonzero(a)[[0, -1]])
700
+
701
+ result = da.flatnonzero(arr)
702
+ assert_eq(result[[0, -1]], np.flatnonzero(a)[[0, -1]])
703
+
704
+ a = a.reshape(16, 16)
705
+ arr = da.from_array(a, chunks=(8, 16))
706
+ arr._chunks = ((8, 8), (np.nan,))
707
+ result = arr[:, [0, -1]]
708
+ expected = a[:, [0, -1]]
709
+ assert_eq(result, expected)
710
+
711
+ arr = da.from_array(a, chunks=(8, 8))
712
+ arr._chunks = ((8, 8), (np.nan, np.nan))
713
+ with pytest.raises(ValueError, match="Array chunk size or shape"):
714
+ arr[:, [0, -1]]
715
+
716
+
717
+ @pytest.mark.parametrize("lock", [True, False])
718
+ @pytest.mark.parametrize("asarray", [True, False])
719
+ @pytest.mark.parametrize("fancy", [True, False])
720
+ def test_gh4043(lock, asarray, fancy):
721
+ a1 = da.from_array(np.zeros(3), chunks=1, asarray=asarray, lock=lock, fancy=fancy)
722
+ a2 = da.from_array(np.ones(3), chunks=1, asarray=asarray, lock=lock, fancy=fancy)
723
+ al = da.stack([a1, a2])
724
+ assert_eq(al, al)
725
+
726
+
727
+ def test_slice_array_3d_with_bool_numpy_array():
728
+ # https://github.com/dask/dask/issues/6089
729
+ array = da.arange(0, 24).reshape((4, 3, 2))
730
+ mask = np.arange(0, 24).reshape((4, 3, 2)) > 12
731
+
732
+ actual = array[mask].compute()
733
+ expected = np.arange(13, 24)
734
+ assert_eq(actual, expected)
735
+
736
+
737
+ def test_slice_masked_arrays():
738
+ arr = np.ma.array(range(8), mask=[0, 0, 1, 0, 0, 1, 0, 1])
739
+ darr = da.from_array(arr, chunks=(4, 4))
740
+ assert_eq(darr[[2, 6]], arr[[2, 6]])
741
+
742
+
743
+ def test_slice_array_null_dimension():
744
+ array = da.from_array(np.zeros((3, 0)))
745
+ expected = np.zeros((3, 0))[[0]]
746
+ assert_eq(array[[0]], expected)
747
+
748
+
749
+ def test_take_sorted_indexer():
750
+ arr = da.ones((250, 100), chunks=((50, 100, 33, 67), 100))
751
+ indexer = list(range(0, 250))
752
+ result = arr[indexer, :]
753
+ assert_eq(arr, result)
754
+ assert {
755
+ **dict(arr.dask),
756
+ **{
757
+ k: Alias(k, k2)
758
+ for k, k2 in zip(
759
+ [k for k in dict(result.dask) if "getitem" in k[0]],
760
+ dict(arr.dask).keys(),
761
+ )
762
+ },
763
+ } == dict(result.dask)
764
+
765
+
766
+ def test_all_none_slices_just_mappings():
767
+ arr = da.ones((10, 10), chunks=(1, 5))
768
+ result = arr[slice(None, 6), slice(None)]
769
+ dsk = dict(result.dask)
770
+ assert len([k for k in dsk if "getitem" in k[0]]) == 12
771
+ # check that we are just mapping the keys
772
+ assert all(isinstance(v, Alias) for k, v in dsk.items() if "getitem" in k[0])
773
+ assert_eq(result, np.ones((6, 10)))
774
+
775
+
776
+ def test_minimal_dtype_doesnt_overflow():
777
+ x = np.arange(1980)
778
+ dx = dask.array.from_array(x, chunks=[248])
779
+ ib = np.zeros(1980, dtype=bool)
780
+ ib[1560:1860] = True
781
+ assert_eq(dx[ib], x[ib])
782
+
783
+
784
+ def test_vindex_with_dask_array():
785
+ arr = np.array([0.2, 0.4, 0.6])
786
+ darr = da.from_array(arr, chunks=-1)
787
+
788
+ indexer = np.random.randint(0, 3, 8).reshape(4, 2).astype(int)
789
+ dindexer = da.from_array(indexer, chunks=(2, 2))
790
+ assert_eq(darr.vindex[dindexer], arr[indexer])
791
+
792
+ msg = "vindex does not support indexing"
793
+
794
+ with pytest.raises(IndexError, match=msg):
795
+ darr.rechunk((1, 1)).vindex[dindexer]
796
+
797
+ with pytest.raises(IndexError, match=msg):
798
+ darr.reshape((3, 1)).vindex[dindexer]
799
+
800
+ with pytest.raises(IndexError, match=msg):
801
+ darr.vindex[(dindexer, None)]
802
+
803
+
804
+ def test_positional_indexer_newaxis():
805
+ arr = da.array([0, 1, 2])
806
+ new = arr[[True, True, False], np.newaxis]
807
+ assert_eq(new, arr.compute()[[True, True, False], np.newaxis])
808
+
809
+
810
+ @pytest.mark.parametrize(
811
+ "shapes",
812
+ [
813
+ (10, 10),
814
+ (np.nan, np.nan),
815
+ (10, np.nan),
816
+ (np.nan, 10),
817
+ ],
818
+ )
819
+ def test_boolean_mask_with_unknown_shape(
820
+ shapes: tuple[float | int, float | int],
821
+ ) -> None:
822
+ x_shape, mask_shape = shapes
823
+ arr = delayed(np.ones(10))
824
+ x = da.concatenate(
825
+ [
826
+ da.from_delayed(arr, shape=(x_shape,), dtype=float),
827
+ da.from_delayed(arr, shape=(x_shape,), dtype=float),
828
+ ]
829
+ )
830
+ mask = da.concatenate(
831
+ [
832
+ da.from_delayed(arr, shape=(mask_shape,), dtype=bool),
833
+ da.from_delayed(arr, shape=(mask_shape,), dtype=bool),
834
+ ]
835
+ )
836
+ x[mask] = 2
837
+
838
+ expected = np.full(20, 2.0)
839
+ assert_eq(x, expected)