dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. dask_array/__init__.py +228 -0
  2. dask_array/_backends.py +76 -0
  3. dask_array/_backends_array.py +99 -0
  4. dask_array/_blockwise.py +1410 -0
  5. dask_array/_broadcast.py +272 -0
  6. dask_array/_chunk.py +445 -0
  7. dask_array/_chunk_types.py +54 -0
  8. dask_array/_collection.py +1644 -0
  9. dask_array/_concatenate.py +331 -0
  10. dask_array/_core_utils.py +1365 -0
  11. dask_array/_dispatch.py +141 -0
  12. dask_array/_einsum.py +277 -0
  13. dask_array/_expr.py +544 -0
  14. dask_array/_expr_flow.py +586 -0
  15. dask_array/_gufunc.py +805 -0
  16. dask_array/_histogram.py +617 -0
  17. dask_array/_map_blocks.py +652 -0
  18. dask_array/_new_collection.py +10 -0
  19. dask_array/_numpy_compat.py +135 -0
  20. dask_array/_overlap.py +1159 -0
  21. dask_array/_rechunk.py +1050 -0
  22. dask_array/_reshape.py +710 -0
  23. dask_array/_routines.py +102 -0
  24. dask_array/_shuffle.py +448 -0
  25. dask_array/_stack.py +264 -0
  26. dask_array/_svg.py +291 -0
  27. dask_array/_templates.py +29 -0
  28. dask_array/_test_utils.py +257 -0
  29. dask_array/_ufunc.py +385 -0
  30. dask_array/_utils.py +349 -0
  31. dask_array/_visualize.py +223 -0
  32. dask_array/_xarray.py +337 -0
  33. dask_array/core/__init__.py +34 -0
  34. dask_array/core/_blockwise_funcs.py +312 -0
  35. dask_array/core/_conversion.py +422 -0
  36. dask_array/core/_from_graph.py +97 -0
  37. dask_array/creation/__init__.py +71 -0
  38. dask_array/creation/_arange.py +121 -0
  39. dask_array/creation/_diag.py +116 -0
  40. dask_array/creation/_diagonal.py +241 -0
  41. dask_array/creation/_eye.py +103 -0
  42. dask_array/creation/_linspace.py +102 -0
  43. dask_array/creation/_mesh.py +134 -0
  44. dask_array/creation/_ones_zeros.py +454 -0
  45. dask_array/creation/_pad.py +270 -0
  46. dask_array/creation/_repeat.py +55 -0
  47. dask_array/creation/_tile.py +36 -0
  48. dask_array/creation/_tri.py +28 -0
  49. dask_array/creation/_utils.py +296 -0
  50. dask_array/fft.py +320 -0
  51. dask_array/io/__init__.py +39 -0
  52. dask_array/io/_base.py +10 -0
  53. dask_array/io/_from_array.py +257 -0
  54. dask_array/io/_from_delayed.py +95 -0
  55. dask_array/io/_from_graph.py +54 -0
  56. dask_array/io/_from_npy_stack.py +67 -0
  57. dask_array/io/_store.py +336 -0
  58. dask_array/io/_tiledb.py +159 -0
  59. dask_array/io/_to_npy_stack.py +65 -0
  60. dask_array/io/_zarr.py +449 -0
  61. dask_array/linalg/__init__.py +39 -0
  62. dask_array/linalg/_cholesky.py +234 -0
  63. dask_array/linalg/_lu.py +300 -0
  64. dask_array/linalg/_norm.py +94 -0
  65. dask_array/linalg/_qr.py +601 -0
  66. dask_array/linalg/_solve.py +349 -0
  67. dask_array/linalg/_svd.py +394 -0
  68. dask_array/linalg/_tensordot.py +334 -0
  69. dask_array/linalg/_utils.py +74 -0
  70. dask_array/manipulation/__init__.py +45 -0
  71. dask_array/manipulation/_expand.py +321 -0
  72. dask_array/manipulation/_flip.py +92 -0
  73. dask_array/manipulation/_roll.py +78 -0
  74. dask_array/manipulation/_transpose.py +309 -0
  75. dask_array/random/__init__.py +125 -0
  76. dask_array/random/_choice.py +181 -0
  77. dask_array/random/_expr.py +256 -0
  78. dask_array/random/_generator.py +441 -0
  79. dask_array/random/_random_state.py +259 -0
  80. dask_array/random/_utils.py +84 -0
  81. dask_array/reductions/__init__.py +84 -0
  82. dask_array/reductions/_arg_reduction.py +130 -0
  83. dask_array/reductions/_common.py +1082 -0
  84. dask_array/reductions/_cumulative.py +522 -0
  85. dask_array/reductions/_percentile.py +261 -0
  86. dask_array/reductions/_reduction.py +725 -0
  87. dask_array/reductions/_trace.py +56 -0
  88. dask_array/routines/__init__.py +133 -0
  89. dask_array/routines/_apply.py +84 -0
  90. dask_array/routines/_bincount.py +112 -0
  91. dask_array/routines/_broadcast.py +111 -0
  92. dask_array/routines/_coarsen.py +115 -0
  93. dask_array/routines/_diff.py +79 -0
  94. dask_array/routines/_gradient.py +158 -0
  95. dask_array/routines/_indexing.py +65 -0
  96. dask_array/routines/_insert_delete.py +132 -0
  97. dask_array/routines/_misc.py +122 -0
  98. dask_array/routines/_nonzero.py +72 -0
  99. dask_array/routines/_search.py +123 -0
  100. dask_array/routines/_select.py +113 -0
  101. dask_array/routines/_statistics.py +171 -0
  102. dask_array/routines/_topk.py +82 -0
  103. dask_array/routines/_triangular.py +74 -0
  104. dask_array/routines/_unique.py +232 -0
  105. dask_array/routines/_where.py +62 -0
  106. dask_array/slicing/__init__.py +67 -0
  107. dask_array/slicing/_basic.py +550 -0
  108. dask_array/slicing/_blocks.py +138 -0
  109. dask_array/slicing/_bool_index.py +145 -0
  110. dask_array/slicing/_setitem.py +329 -0
  111. dask_array/slicing/_squeeze.py +101 -0
  112. dask_array/slicing/_utils.py +1133 -0
  113. dask_array/slicing/_vindex.py +282 -0
  114. dask_array/stacking/__init__.py +15 -0
  115. dask_array/stacking/_block.py +83 -0
  116. dask_array/stacking/_simple.py +58 -0
  117. dask_array/templates/array.html.j2 +48 -0
  118. dask_array/tests/__init__.py +0 -0
  119. dask_array/tests/conftest.py +22 -0
  120. dask_array/tests/test_api.py +40 -0
  121. dask_array/tests/test_binary_op_chunks.py +107 -0
  122. dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
  123. dask_array/tests/test_collection.py +799 -0
  124. dask_array/tests/test_creation.py +1102 -0
  125. dask_array/tests/test_expr_flow.py +143 -0
  126. dask_array/tests/test_linalg.py +1130 -0
  127. dask_array/tests/test_map_blocks_multi_output.py +104 -0
  128. dask_array/tests/test_rechunk_pushdown.py +214 -0
  129. dask_array/tests/test_reductions.py +1091 -0
  130. dask_array/tests/test_routines.py +2853 -0
  131. dask_array/tests/test_shuffle_chunks.py +67 -0
  132. dask_array/tests/test_slice_pushdown.py +968 -0
  133. dask_array/tests/test_slice_through_blockwise.py +678 -0
  134. dask_array/tests/test_slice_through_overlap.py +366 -0
  135. dask_array/tests/test_slice_through_reshape.py +272 -0
  136. dask_array/tests/test_slicing.py +839 -0
  137. dask_array/tests/test_transpose_slice_pushdown.py +208 -0
  138. dask_array/tests/test_visualize.py +94 -0
  139. dask_array/tests/test_xarray.py +193 -0
  140. dask_array-0.1.0.dist-info/METADATA +48 -0
  141. dask_array-0.1.0.dist-info/RECORD +144 -0
  142. dask_array-0.1.0.dist-info/WHEEL +4 -0
  143. dask_array-0.1.0.dist-info/entry_points.txt +2 -0
  144. dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,2853 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import itertools
5
+ import pickle
6
+ import sys
7
+ import warnings
8
+ from numbers import Number
9
+
10
+ import pytest
11
+
12
+ import dask
13
+ from dask.delayed import delayed
14
+
15
+ np = pytest.importorskip("numpy")
16
+
17
+ import dask_array as da
18
+ from dask_array._numpy_compat import NUMPY_GE_200, NUMPY_GE_220, AxisError
19
+ from dask_array._test_utils import allclose, assert_eq, same_keys
20
+
21
+
22
+ def test_array():
23
+ x = np.ones(5, dtype="i4")
24
+ d = da.ones(5, chunks=3, dtype="i4")
25
+ assert_eq(da.array(d, ndmin=3, dtype="i8"), np.array(x, ndmin=3, dtype="i8"))
26
+
27
+ # regression #1847 this shall not raise an exception.
28
+ x = da.ones((100, 3), chunks=10)
29
+ y = da.array(x)
30
+ assert isinstance(y, da.Array)
31
+
32
+
33
+ def test_array_return_type():
34
+ # Regression test for https://github.com/dask/dask/issues/5426
35
+ x = [0, 1, 2, 3]
36
+ dx = da.array(x)
37
+ assert isinstance(dx, da.Array)
38
+ assert_eq(x, dx)
39
+
40
+
41
+ def test_derived_docstrings():
42
+ assert "This docstring was copied from numpy.array" in da.routines.array.__doc__
43
+ assert "Create an array." in da.routines.array.__doc__
44
+
45
+
46
+ @pytest.mark.parametrize("funcname", ["atleast_1d", "atleast_2d", "atleast_3d"])
47
+ def test_atleast_nd_no_args(funcname):
48
+ np_func = getattr(np, funcname)
49
+ da_func = getattr(da, funcname)
50
+
51
+ np_r_n = np_func()
52
+ da_r_n = da_func()
53
+
54
+ assert np_r_n == da_r_n
55
+
56
+
57
+ @pytest.mark.parametrize("funcname", ["atleast_1d", "atleast_2d", "atleast_3d"])
58
+ @pytest.mark.parametrize(
59
+ "shape, chunks",
60
+ [
61
+ (tuple(), tuple()),
62
+ ((4,), (2,)),
63
+ ((4, 6), (2, 3)),
64
+ ((4, 6, 8), (2, 3, 4)),
65
+ ((4, 6, 8, 10), (2, 3, 4, 5)),
66
+ ],
67
+ )
68
+ def test_atleast_nd_one_arg(funcname, shape, chunks):
69
+ np_a = np.random.default_rng().random(shape)
70
+ da_a = da.from_array(np_a, chunks=chunks)
71
+
72
+ np_func = getattr(np, funcname)
73
+ da_func = getattr(da, funcname)
74
+
75
+ np_r = np_func(np_a)
76
+ da_r = da_func(da_a)
77
+
78
+ assert_eq(np_r, da_r)
79
+
80
+
81
+ @pytest.mark.parametrize("funcname", ["atleast_1d", "atleast_2d", "atleast_3d"])
82
+ @pytest.mark.parametrize(
83
+ "shape1, shape2",
84
+ list(itertools.combinations_with_replacement([tuple(), (4,), (4, 6), (4, 6, 8), (4, 6, 8, 10)], 2)),
85
+ )
86
+ def test_atleast_nd_two_args(funcname, shape1, shape2):
87
+ np_a_1 = np.random.default_rng().random(shape1)
88
+ da_a_1 = da.from_array(np_a_1, chunks=tuple(c // 2 for c in shape1))
89
+
90
+ np_a_2 = np.random.default_rng().random(shape2)
91
+ da_a_2 = da.from_array(np_a_2, chunks=tuple(c // 2 for c in shape2))
92
+
93
+ np_a_n = [np_a_1, np_a_2]
94
+ da_a_n = [da_a_1, da_a_2]
95
+
96
+ np_func = getattr(np, funcname)
97
+ da_func = getattr(da, funcname)
98
+
99
+ np_r_n = np_func(*np_a_n)
100
+ da_r_n = da_func(*da_a_n)
101
+
102
+ assert type(np_r_n) is type(da_r_n)
103
+
104
+ assert len(np_r_n) == len(da_r_n)
105
+
106
+ for np_r, da_r in zip(np_r_n, da_r_n):
107
+ assert_eq(np_r, da_r)
108
+
109
+
110
+ def test_transpose():
111
+ x = np.arange(240).reshape((4, 6, 10))
112
+ d = da.from_array(x, (2, 3, 4))
113
+
114
+ assert_eq(d.transpose((2, 0, 1)), x.transpose((2, 0, 1)))
115
+ assert same_keys(d.transpose((2, 0, 1)), d.transpose((2, 0, 1)))
116
+
117
+ assert_eq(d.transpose(2, 0, 1), x.transpose(2, 0, 1))
118
+ assert same_keys(d.transpose(2, 0, 1), d.transpose(2, 0, 1))
119
+
120
+ with pytest.raises(ValueError):
121
+ d.transpose(1, 2)
122
+
123
+ with pytest.raises(ValueError):
124
+ d.transpose((1, 2))
125
+
126
+
127
+ def test_transpose_negative_axes():
128
+ x = np.ones((2, 3, 4, 5))
129
+ y = da.ones((2, 3, 4, 5), chunks=3)
130
+
131
+ assert_eq(x.transpose([-1, -2, 0, 1]), y.transpose([-1, -2, 0, 1]))
132
+
133
+
134
+ def test_transpose_skip_when_possible():
135
+ x = da.ones((2, 3, 4), chunks=3)
136
+ assert x.transpose((0, 1, 2)) is x
137
+ assert x.transpose((-3, -2, -1)) is x
138
+
139
+
140
+ def test_swapaxes():
141
+ x = np.random.default_rng().normal(0, 10, size=(10, 12, 7))
142
+ d = da.from_array(x, chunks=(4, 5, 2))
143
+
144
+ assert_eq(np.swapaxes(x, 0, 1), da.swapaxes(d, 0, 1))
145
+ assert_eq(np.swapaxes(x, 2, 1), da.swapaxes(d, 2, 1))
146
+ assert_eq(x.swapaxes(2, 1), d.swapaxes(2, 1))
147
+ assert_eq(x.swapaxes(0, 0), d.swapaxes(0, 0))
148
+ assert_eq(x.swapaxes(1, 2), d.swapaxes(1, 2))
149
+ assert_eq(x.swapaxes(0, -1), d.swapaxes(0, -1))
150
+ assert_eq(x.swapaxes(-1, 1), d.swapaxes(-1, 1))
151
+
152
+ assert d.swapaxes(0, 1).name == d.swapaxes(0, 1).name
153
+ # Note: swapaxes(0,1) and swapaxes(1,0) are mathematically equivalent
154
+ # so they may have the same name in implementations that normalize
155
+ assert d.swapaxes(0, 1).name != d.swapaxes(0, 2).name
156
+
157
+
158
+ @pytest.mark.parametrize("funcname", ["moveaxis", "rollaxis"])
159
+ @pytest.mark.parametrize("shape", [(), (5,), (3, 5, 7, 3)])
160
+ def test_moveaxis_rollaxis(funcname, shape):
161
+ x = np.random.default_rng().random(shape)
162
+ d = da.from_array(x, chunks=(len(shape) * (2,)))
163
+ np_func = getattr(np, funcname)
164
+ da_func = getattr(da, funcname)
165
+ for axis1 in range(-x.ndim, x.ndim):
166
+ assert isinstance(da_func(d, 0, axis1), da.Array)
167
+ for axis2 in range(-x.ndim, x.ndim):
168
+ assert_eq(np_func(x, axis1, axis2), da_func(d, axis1, axis2))
169
+
170
+
171
+ def test_moveaxis_rollaxis_keyword():
172
+ x = np.random.default_rng().random((10, 12, 7))
173
+ d = da.from_array(x, chunks=(4, 5, 2))
174
+ assert_eq(np.moveaxis(x, destination=1, source=0), da.moveaxis(d, destination=1, source=0))
175
+ assert_eq(np.rollaxis(x, 2), da.rollaxis(d, 2))
176
+ assert isinstance(da.rollaxis(d, 1), da.Array)
177
+ assert_eq(np.rollaxis(x, start=1, axis=2), da.rollaxis(d, start=1, axis=2))
178
+
179
+
180
+ def test_moveaxis_rollaxis_numpy_api():
181
+ a = da.random.default_rng().random((4, 4, 4), chunks=2)
182
+ result = np.moveaxis(a, 2, 0)
183
+ assert isinstance(result, da.Array)
184
+ assert_eq(result, np.moveaxis(a.compute(), 2, 0))
185
+
186
+ result = np.rollaxis(a, 2, 0)
187
+ assert isinstance(result, da.Array)
188
+ assert_eq(result, np.rollaxis(a.compute(), 2, 0))
189
+
190
+
191
+ @pytest.mark.parametrize(
192
+ "funcname, kwargs",
193
+ [
194
+ ("flipud", {}),
195
+ ("fliplr", {}),
196
+ ("flip", {}),
197
+ ("flip", {"axis": 0}),
198
+ ("flip", {"axis": 1}),
199
+ ("flip", {"axis": 2}),
200
+ ("flip", {"axis": -1}),
201
+ ("flip", {"axis": (0, 2)}),
202
+ ],
203
+ )
204
+ @pytest.mark.parametrize("shape", [tuple(), (4,), (4, 6), (4, 6, 8), (4, 6, 8, 10)])
205
+ def test_flip(funcname, kwargs, shape):
206
+ axis = kwargs.get("axis")
207
+ if axis is None:
208
+ if funcname == "flipud":
209
+ axis = (0,)
210
+ elif funcname == "fliplr":
211
+ axis = (1,)
212
+ elif funcname == "flip":
213
+ axis = range(len(shape))
214
+ elif not isinstance(axis, tuple):
215
+ axis = (axis,)
216
+
217
+ np_a = np.random.default_rng().random(shape)
218
+ da_a = da.from_array(np_a, chunks=1)
219
+
220
+ np_func = getattr(np, funcname)
221
+ da_func = getattr(da, funcname)
222
+
223
+ try:
224
+ for ax in axis:
225
+ range(np_a.ndim)[ax]
226
+ except IndexError:
227
+ with pytest.raises(ValueError):
228
+ da_func(da_a, **kwargs)
229
+ else:
230
+ np_r = np_func(np_a, **kwargs)
231
+ da_r = da_func(da_a, **kwargs)
232
+
233
+ assert_eq(np_r, da_r)
234
+
235
+
236
+ @pytest.mark.parametrize(
237
+ "kwargs",
238
+ [
239
+ {},
240
+ {"axes": (1, 0)},
241
+ {"axes": (2, 3)},
242
+ {"axes": (0, 1, 2)},
243
+ ],
244
+ )
245
+ @pytest.mark.parametrize(
246
+ "shape",
247
+ [
248
+ tuple(),
249
+ (4,),
250
+ (4, 6),
251
+ (4, 6, 8),
252
+ ],
253
+ )
254
+ def test_rot90(kwargs, shape):
255
+ axes = kwargs.get("axes", (0, 1))
256
+ np_a = np.random.default_rng().random(shape)
257
+ da_a = da.from_array(np_a, chunks=2)
258
+
259
+ np_func = np.rot90
260
+ da_func = da.rot90
261
+
262
+ try:
263
+ for axis in axes[:2]:
264
+ range(np_a.ndim)[axis]
265
+ except IndexError:
266
+ with pytest.raises(ValueError):
267
+ da_func(da_a, **kwargs)
268
+ else:
269
+ if len(axes) != 2 or axes[0] == axes[1]:
270
+ with pytest.raises(ValueError):
271
+ da_func(da_a, **kwargs)
272
+ else:
273
+ for k in range(-3, 9):
274
+ np_r = np_func(np_a, k=k, **kwargs)
275
+ da_r = da_func(da_a, k=k, **kwargs)
276
+ assert_eq(np_r, da_r)
277
+
278
+
279
+ @pytest.mark.parametrize(
280
+ "x_shape, y_shape, x_chunks, y_chunks",
281
+ [
282
+ [(), (), (), ()],
283
+ [(), (7,), (), ()],
284
+ [(), (7, 11), (), ()],
285
+ [(), (7, 11, 15), (), ()],
286
+ [(), (7, 11, 15, 19), (), ()],
287
+ [(7,), (), (), ()],
288
+ [(7,), (7,), (), ()],
289
+ [(11,), (11, 7), (), ()],
290
+ [(15,), (7, 15, 11), (), ()],
291
+ [(19,), (7, 11, 19, 15), (), ()],
292
+ [(7, 11), (), (), ()],
293
+ [(7, 11), (11,), (), ()],
294
+ [(7, 11), (11, 7), (), ()],
295
+ [(11, 15), (7, 15, 11), (), ()],
296
+ [(15, 19), (7, 11, 19, 15), (), ()],
297
+ [(7, 11, 15), (), (), ()],
298
+ [(7, 11, 15), (15,), (), ()],
299
+ [(7, 11, 15), (15, 7), (), ()],
300
+ [(7, 11, 15), (7, 15, 11), (), ()],
301
+ [(11, 15, 19), (7, 11, 19, 15), (), ()],
302
+ [(7, 11, 15, 19), (), (), ()],
303
+ [(7, 11, 15, 19), (19,), (), ()],
304
+ [(7, 11, 15, 19), (19, 7), (), ()],
305
+ [(7, 11, 15, 19), (11, 19, 13), (), ()],
306
+ [(7, 11, 15, 19), (7, 11, 19, 15), (), ()],
307
+ # These tests use explicitly special/disparate chunk sizes:
308
+ [(), (7,), (), (5,)],
309
+ [(), (7, 11, 15, 19), (), (1, 3, 5, 19)],
310
+ [(7, 11), (11, 7), (1, 1), (1, 1)],
311
+ [(7, 11), (11, 7), (3, 5), (4, 2)],
312
+ [(7, 11), (11, 7), (7, 11), (11, 7)],
313
+ [(11, 15, 19), (7, 11, 19, 15), (7, 7, 7), (3, 9, 9, 9)],
314
+ [(3, 3, 20, 30), (3, 3, 30, 20), (1, 3, 2, 6), (1, 3, 5, 10)],
315
+ ],
316
+ )
317
+ def test_matmul(x_shape, y_shape, x_chunks, y_chunks):
318
+ rng = np.random.default_rng(3732)
319
+
320
+ x = rng.random(x_shape)[()]
321
+ y = rng.random(y_shape)[()]
322
+
323
+ a = da.from_array(x, chunks=x_chunks or tuple((i // 2) for i in x.shape))
324
+ b = da.from_array(y, chunks=y_chunks or tuple((i // 2) for i in y.shape))
325
+
326
+ expected = None
327
+ try:
328
+ expected = np.matmul(x, y)
329
+ except ValueError:
330
+ pass
331
+
332
+ for d1, d2 in itertools.product([a, x], [b, y]):
333
+ if x.ndim == 0 or y.ndim == 0:
334
+ with pytest.raises(ValueError):
335
+ da.matmul(d1, d2)
336
+ else:
337
+ assert_eq(expected, da.matmul(d1, d2))
338
+
339
+
340
+ def test_tensordot():
341
+ x = np.arange(400).reshape((20, 20))
342
+ a = da.from_array(x, chunks=(5, 4))
343
+ y = np.arange(200).reshape((20, 10))
344
+ b = da.from_array(y, chunks=(4, 5))
345
+
346
+ for axes in [1, (1, 0), (-1, 0)]:
347
+ assert_eq(da.tensordot(a, b, axes=axes), np.tensordot(x, y, axes=axes))
348
+ assert_eq(da.tensordot(x, b, axes=axes), np.tensordot(x, y, axes=axes))
349
+ assert_eq(da.tensordot(a, y, axes=axes), np.tensordot(x, y, axes=axes))
350
+
351
+ assert same_keys(da.tensordot(a, b, axes=(1, 0)), da.tensordot(a, b, axes=(1, 0)))
352
+
353
+ # Increasing number of chunks warning
354
+ with pytest.warns(da.PerformanceWarning):
355
+ assert not same_keys(da.tensordot(a, b, axes=0), da.tensordot(a, b, axes=1))
356
+
357
+
358
+ @pytest.mark.parametrize("axes", [0, 1, (0, 1), (1, 0), ((1, 0), (2, 1)), ((1, 2), (2, 0)), ((2, 0), (1, 2))])
359
+ def test_tensordot_2(axes):
360
+ x = np.arange(4 * 4 * 4).reshape((4, 4, 4))
361
+ y = da.from_array(x, chunks=2)
362
+
363
+ assert_eq(da.tensordot(y, y, axes=axes), np.tensordot(x, x, axes=axes))
364
+
365
+
366
+ @pytest.mark.parametrize("chunks", ["auto", (4, 6), (2, 3), (4, 3), (2, 6)])
367
+ def test_tensordot_double_contraction_neq2(chunks):
368
+ # Regression test for https://github.com/dask/dask/issues/5472
369
+ x = np.arange(24).reshape(4, 6)
370
+ y = da.from_array(x, chunks=chunks)
371
+ assert_eq(da.tensordot(y, y, axes=2), np.tensordot(x, x, axes=2))
372
+
373
+
374
+ def test_tensordot_double_contraction_ngt2():
375
+ # Regression test for https://github.com/dask/dask/issues/5472
376
+ x = np.arange(60.0).reshape(3, 4, 5)
377
+ y = np.arange(60.0).reshape(4, 5, 3)
378
+ u = da.from_array(x)
379
+ v = da.from_array(y)
380
+
381
+ assert_eq(da.tensordot(u, v, axes=2), np.tensordot(x, y, axes=2))
382
+
383
+ x = np.arange(60.0).reshape(3, 4, 5)
384
+ y = np.arange(60.0).reshape(4, 5, 3)
385
+ u = da.from_array(x, chunks=3)
386
+ v = da.from_array(y)
387
+
388
+ assert_eq(da.tensordot(u, v, axes=2), np.tensordot(x, y, axes=2))
389
+
390
+
391
+ def test_tensordot_more_than_26_dims():
392
+ ndim = 27
393
+ x = np.broadcast_to(1, [2] * ndim)
394
+ dx = da.from_array(x, chunks=-1)
395
+ assert_eq(da.tensordot(dx, dx, ndim), np.array(2**ndim))
396
+
397
+
398
+ def test_dot_method():
399
+ x = np.arange(400).reshape((20, 20))
400
+ a = da.from_array(x, chunks=(5, 5))
401
+ y = np.arange(200).reshape((20, 10))
402
+ b = da.from_array(y, chunks=(5, 5))
403
+
404
+ assert_eq(a.dot(b), x.dot(y))
405
+
406
+
407
+ def test_dot_persist_equivalence():
408
+ # Regression test for https://github.com/dask/dask/issues/6907
409
+ x = da.random.default_rng().random((4, 4), chunks=(2, 2))
410
+ x[x < 0.65] = 0
411
+ y = x.persist()
412
+ z = x.compute()
413
+ r1 = da.dot(x, x).compute()
414
+ r2 = da.dot(y, y).compute()
415
+ rr = np.dot(z, z)
416
+ assert np.allclose(rr, r1)
417
+ assert np.allclose(rr, r2)
418
+
419
+
420
+ @pytest.mark.parametrize("shape, chunks", [((20,), (6,)), ((4, 5), (2, 3))])
421
+ def test_vdot(shape, chunks):
422
+ rng = np.random.default_rng(1337)
423
+
424
+ x = 2 * rng.random((2,) + shape) - 1
425
+ x = x[0] + 1j * x[1]
426
+
427
+ y = 2 * rng.random((2,) + shape) - 1
428
+ y = y[0] + 1j * y[1]
429
+
430
+ a = da.from_array(x, chunks=chunks)
431
+ b = da.from_array(y, chunks=chunks)
432
+
433
+ assert_eq(np.vdot(x, y), da.vdot(a, b))
434
+ assert_eq(np.vdot(y, x), da.vdot(b, a))
435
+ assert_eq(da.vdot(a, b), da.vdot(b, a).conj())
436
+
437
+
438
+ @pytest.mark.parametrize("shape1, shape2", [((20,), (6,)), ((4, 5), (2, 3))])
439
+ def test_outer(shape1, shape2):
440
+ rng = np.random.default_rng(1337)
441
+
442
+ x = 2 * rng.random(shape1) - 1
443
+ y = 2 * rng.random(shape2) - 1
444
+
445
+ a = da.from_array(x, chunks=3)
446
+ b = da.from_array(y, chunks=3)
447
+
448
+ assert_eq(np.outer(x, y), da.outer(a, b))
449
+ assert_eq(np.outer(y, x), da.outer(b, a))
450
+
451
+
452
+ @pytest.mark.parametrize(
453
+ "func1d_name, func1d, specify_output_props",
454
+ [
455
+ ["ndim", lambda x: x.ndim, False],
456
+ ["sum", lambda x: x.sum(), False],
457
+ ["range", lambda x: [x.min(), x.max()], False],
458
+ ["range2", lambda x: [[x.min(), x.max()], [x.max(), x.min()]], False],
459
+ ["cumsum", lambda x: np.cumsum(x), True],
460
+ ],
461
+ )
462
+ @pytest.mark.parametrize(
463
+ "input_shape, axis",
464
+ [[(10, 15, 20), 0], [(10, 15, 20), 1], [(10, 15, 20), 2], [(10, 15, 20), -1]],
465
+ )
466
+ def test_apply_along_axis(func1d_name, func1d, specify_output_props, input_shape, axis):
467
+ a = np.random.default_rng().integers(0, 10, input_shape)
468
+ d = da.from_array(a, chunks=(len(input_shape) * (5,)))
469
+
470
+ output_shape = None
471
+ output_dtype = None
472
+
473
+ if specify_output_props:
474
+ slices = [0] * a.ndim
475
+ slices[axis] = slice(None)
476
+ slices = tuple(slices)
477
+ sample = np.array(func1d(a[slices]))
478
+ output_shape = sample.shape
479
+ output_dtype = sample.dtype
480
+
481
+ assert_eq(
482
+ da.apply_along_axis(func1d, axis, d, dtype=output_dtype, shape=output_shape),
483
+ np.apply_along_axis(func1d, axis, a),
484
+ )
485
+
486
+
487
+ @pytest.mark.parametrize(
488
+ "func_name, func",
489
+ [
490
+ ["sum0", lambda x, axis: x.sum(axis=axis)],
491
+ ["sum1", lambda x, axis: x.sum(axis=axis, keepdims=True)],
492
+ [
493
+ "range",
494
+ lambda x, axis: np.concatenate(
495
+ [x.min(axis=axis, keepdims=True), x.max(axis=axis, keepdims=True)],
496
+ axis=axis,
497
+ ),
498
+ ],
499
+ ],
500
+ )
501
+ @pytest.mark.parametrize(
502
+ "shape, axes",
503
+ [
504
+ [(10, 15, 20), tuple()],
505
+ [(10, 15, 20), 0],
506
+ [(10, 15, 20), (1,)],
507
+ [(10, 15, 20), (-1, 1)],
508
+ [(10, 15, 20), (2, 0, 1)],
509
+ ],
510
+ )
511
+ def test_apply_over_axes(func_name, func, shape, axes):
512
+ a = np.random.default_rng().integers(0, 10, shape)
513
+ d = da.from_array(a, chunks=(len(shape) * (5,)))
514
+
515
+ assert_eq(da.apply_over_axes(func, d, axes), np.apply_over_axes(func, a, axes))
516
+
517
+
518
+ @pytest.mark.parametrize(
519
+ "shape, axis",
520
+ [
521
+ [(10, 15, 20), None],
522
+ [(10, 15, 20), 0],
523
+ [(10, 15, 20), 1],
524
+ [(10, 15, 20), 2],
525
+ [(10, 15, 20), -1],
526
+ ],
527
+ )
528
+ def test_ptp(shape, axis):
529
+ a = np.random.default_rng().integers(0, 10, shape)
530
+ d = da.from_array(a, chunks=(len(shape) * (5,)))
531
+
532
+ assert_eq(da.ptp(d, axis), np.ptp(a, axis))
533
+
534
+
535
+ @pytest.mark.parametrize(
536
+ "shape, axis",
537
+ [[(10, 15, 20), 0], [(10, 15, 20), 1], [(10, 15, 20), 2], [(10, 15, 20), -1]],
538
+ )
539
+ @pytest.mark.parametrize("n", [0, 1, 2])
540
+ def test_diff(shape, n, axis):
541
+ x = np.random.default_rng().integers(0, 10, shape)
542
+ a = da.from_array(x, chunks=(len(shape) * (5,)))
543
+
544
+ assert_eq(da.diff(a, n, axis), np.diff(x, n, axis))
545
+
546
+
547
+ @pytest.mark.parametrize("n", [0, 1, 2])
548
+ def test_diff_prepend(n):
549
+ x = np.arange(5) + 1
550
+ a = da.from_array(x, chunks=2)
551
+ assert_eq(da.diff(a, n, prepend=0), np.diff(x, n, prepend=0))
552
+ assert_eq(da.diff(a, n, prepend=[0]), np.diff(x, n, prepend=[0]))
553
+ assert_eq(da.diff(a, n, prepend=[-1, 0]), np.diff(x, n, prepend=[-1, 0]))
554
+
555
+ x = np.arange(16).reshape(4, 4)
556
+ a = da.from_array(x, chunks=2)
557
+ assert_eq(da.diff(a, n, axis=1, prepend=0), np.diff(x, n, axis=1, prepend=0))
558
+ assert_eq(
559
+ da.diff(a, n, axis=1, prepend=[[0], [0], [0], [0]]),
560
+ np.diff(x, n, axis=1, prepend=[[0], [0], [0], [0]]),
561
+ )
562
+ assert_eq(da.diff(a, n, axis=0, prepend=0), np.diff(x, n, axis=0, prepend=0))
563
+ assert_eq(
564
+ da.diff(a, n, axis=0, prepend=[[0, 0, 0, 0]]),
565
+ np.diff(x, n, axis=0, prepend=[[0, 0, 0, 0]]),
566
+ )
567
+
568
+ if n > 0:
569
+ # When order is 0 the result is the input array, it doesn't raise
570
+ # an error
571
+ with pytest.raises(ValueError):
572
+ da.diff(a, n, prepend=np.zeros((3, 3)))
573
+
574
+
575
+ @pytest.mark.parametrize("n", [0, 1, 2])
576
+ def test_diff_append(n):
577
+ x = np.arange(5) + 1
578
+ a = da.from_array(x, chunks=2)
579
+ assert_eq(da.diff(a, n, append=0), np.diff(x, n, append=0))
580
+ assert_eq(da.diff(a, n, append=[0]), np.diff(x, n, append=[0]))
581
+ assert_eq(da.diff(a, n, append=[-1, 0]), np.diff(x, n, append=[-1, 0]))
582
+
583
+ x = np.arange(16).reshape(4, 4)
584
+ a = da.from_array(x, chunks=2)
585
+ assert_eq(da.diff(a, n, axis=1, append=0), np.diff(x, n, axis=1, append=0))
586
+ assert_eq(
587
+ da.diff(a, n, axis=1, append=[[0], [0], [0], [0]]),
588
+ np.diff(x, n, axis=1, append=[[0], [0], [0], [0]]),
589
+ )
590
+ assert_eq(da.diff(a, n, axis=0, append=0), np.diff(x, n, axis=0, append=0))
591
+ assert_eq(
592
+ da.diff(a, n, axis=0, append=[[0, 0, 0, 0]]),
593
+ np.diff(x, n, axis=0, append=[[0, 0, 0, 0]]),
594
+ )
595
+
596
+ if n > 0:
597
+ with pytest.raises(ValueError):
598
+ # When order is 0 the result is the input array, it doesn't raise
599
+ # an error
600
+ da.diff(a, n, append=np.zeros((3, 3)))
601
+
602
+
603
+ def test_diff_negative_order():
604
+ with pytest.raises(ValueError):
605
+ da.diff(da.arange(10), -1)
606
+
607
+
608
+ @pytest.mark.parametrize("shape", [(10,), (10, 15)])
609
+ @pytest.mark.parametrize("to_end, to_begin", [[None, None], [0, 0], [[1, 2], [3, 4]]])
610
+ def test_ediff1d(shape, to_end, to_begin):
611
+ x = np.random.default_rng().integers(0, 10, shape)
612
+ a = da.from_array(x, chunks=(len(shape) * (5,)))
613
+
614
+ assert_eq(da.ediff1d(a, to_end, to_begin), np.ediff1d(x, to_end, to_begin))
615
+
616
+
617
+ @pytest.mark.parametrize(
618
+ "shape, varargs, axis",
619
+ [
620
+ [(10, 15, 20), (), None],
621
+ [(10, 15, 20), (2,), None],
622
+ [(10, 15, 20), (1.0, 1.5, 2.0), None],
623
+ [(10, 15, 20), (), 0],
624
+ [(10, 15, 20), (), 1],
625
+ [(10, 15, 20), (), 2],
626
+ [(10, 15, 20), (), -1],
627
+ [(10, 15, 20), (), (0, 2)],
628
+ [(10, 15, 20), (np.exp(np.arange(10)), np.exp(np.arange(20))), (0, 2)],
629
+ [(10, 15, 20), (0.5, np.exp(np.arange(20))), (0, 2)],
630
+ [(10, 15, 20), (np.exp(np.arange(20)),), -1],
631
+ ],
632
+ )
633
+ @pytest.mark.parametrize("edge_order", [1, 2])
634
+ def test_gradient(shape, varargs, axis, edge_order):
635
+ a = np.random.default_rng().integers(0, 10, shape)
636
+ d_a = da.from_array(a, chunks=(len(shape) * (5,)))
637
+
638
+ r_a = np.gradient(a, *varargs, axis=axis, edge_order=edge_order)
639
+ r_d_a = da.gradient(d_a, *varargs, axis=axis, edge_order=edge_order)
640
+
641
+ if isinstance(axis, Number):
642
+ assert_eq(r_d_a, r_a)
643
+ else:
644
+ assert len(r_d_a) == len(r_a)
645
+
646
+ for e_r_d_a, e_r_a in zip(r_d_a, r_a):
647
+ assert_eq(e_r_d_a, e_r_a)
648
+
649
+ assert_eq(da.sqrt(sum(map(da.square, r_d_a))), np.sqrt(sum(map(np.square, r_a))))
650
+
651
+
652
+ def test_bincount():
653
+ x = np.array([2, 1, 5, 2, 1])
654
+ d = da.from_array(x, chunks=2)
655
+ e = da.bincount(d, minlength=6)
656
+ assert_eq(e, np.bincount(x, minlength=6))
657
+ assert same_keys(da.bincount(d, minlength=6), e)
658
+ assert e.shape == (6,) # shape equal to minlength
659
+ assert e.chunks == ((6,),)
660
+
661
+ assert da.bincount(d, minlength=6).name != da.bincount(d, minlength=7).name
662
+ assert da.bincount(d, minlength=6).name == da.bincount(d, minlength=6).name
663
+
664
+ expected_output = np.array([0, 2, 2, 0, 0, 1], dtype=e.dtype)
665
+ assert_eq(e[0:], expected_output) # can bincount result be sliced
666
+
667
+
668
+ @pytest.mark.parametrize(
669
+ "weights",
670
+ [
671
+ np.array([1, 2, 1, 0.5, 1], dtype=np.float32),
672
+ np.array([1, 2, 1, 0, 1], dtype=np.int32),
673
+ ],
674
+ )
675
+ def test_bincount_with_weights(weights):
676
+ x = np.array([2, 1, 5, 2, 1])
677
+ d = da.from_array(x, chunks=2)
678
+
679
+ dweights = da.from_array(weights, chunks=2)
680
+ e = da.bincount(d, weights=dweights, minlength=6)
681
+ assert_eq(e, np.bincount(x, weights=dweights.compute(), minlength=6))
682
+ assert same_keys(da.bincount(d, weights=dweights, minlength=6), e)
683
+
684
+
685
+ def test_bincount_unspecified_minlength():
686
+ x = np.array([1, 1, 3, 7, 0])
687
+ d = da.from_array(x, chunks=2)
688
+ e = da.bincount(d)
689
+ assert_eq(e, np.bincount(x))
690
+ assert same_keys(da.bincount(d), e)
691
+ assert len(e.compute()) == 8 # shape is (nan,) so must compute for len()
692
+
693
+
694
+ def test_digitize():
695
+ x = np.array([2, 4, 5, 6, 1])
696
+ bins = np.array([1, 2, 3, 4, 5])
697
+ for chunks in [2, 4]:
698
+ for right in [False, True]:
699
+ d = da.from_array(x, chunks=chunks)
700
+ assert_eq(da.digitize(d, bins, right=right), np.digitize(x, bins, right=right))
701
+
702
+ x = np.random.default_rng().random(size=(100, 100))
703
+ bins = np.random.default_rng().random(size=13)
704
+ bins.sort()
705
+ for chunks in [(10, 10), (10, 20), (13, 17), (87, 54)]:
706
+ for right in [False, True]:
707
+ d = da.from_array(x, chunks=chunks)
708
+ assert_eq(da.digitize(d, bins, right=right), np.digitize(x, bins, right=right))
709
+
710
+
711
+ @pytest.mark.parametrize(
712
+ "a, a_chunks, v, v_chunks",
713
+ [
714
+ [[], 1, [], 1],
715
+ [[0], 1, [0], 1],
716
+ [[-10, 0, 10, 20, 30], 3, [11, 30], 2],
717
+ [[-10, 0, 10, 20, 30], 3, [11, 30, -20, 1, -10, 10, 37, 11], 5],
718
+ [[-10, 0, 10, 20, 30], 3, [[11, 30, -20, 1, -10, 10, 37, 11]], 5],
719
+ [[-10, 0, 10, 20, 30], 3, [[7, 0], [-10, 10], [11, -1], [15, 15]], (2, 2)],
720
+ ],
721
+ )
722
+ @pytest.mark.parametrize("side", ["left", "right"])
723
+ def test_searchsorted(a, a_chunks, v, v_chunks, side):
724
+ a = np.array(a)
725
+ v = np.array(v)
726
+
727
+ ad = da.asarray(a, chunks=a_chunks)
728
+ vd = da.asarray(v, chunks=v_chunks)
729
+
730
+ out = da.searchsorted(ad, vd, side)
731
+
732
+ assert out.shape == vd.shape
733
+ assert out.chunks == vd.chunks
734
+ assert_eq(out, np.searchsorted(a, v, side))
735
+
736
+
737
+ def test_searchsorted_sorter_not_implemented():
738
+ with pytest.raises(NotImplementedError):
739
+ da.searchsorted(da.asarray([1, 0]), da.asarray([1]), sorter=da.asarray([1, 0]))
740
+
741
+
742
+ def test_histogram():
743
+ # Test for normal, flattened input
744
+ n = 100
745
+ v = da.random.default_rng().random(n, chunks=10)
746
+ bins = np.arange(0, 1.01, 0.01)
747
+ (a1, b1) = da.histogram(v, bins=bins)
748
+ (a2, b2) = np.histogram(v, bins=bins)
749
+
750
+ # Check if the sum of the bins equals the number of samples
751
+ assert a2.sum(axis=0) == n
752
+ assert a1.sum(axis=0) == n
753
+ assert_eq(a1, a2)
754
+ assert same_keys(da.histogram(v, bins=bins)[0], a1)
755
+
756
+
757
+ def test_histogram_alternative_bins_range():
758
+ v = da.random.default_rng().random(100, chunks=10)
759
+ (a1, b1) = da.histogram(v, bins=10, range=(0, 1))
760
+ (a2, b2) = np.histogram(v, bins=10, range=(0, 1))
761
+ assert_eq(a1, a2)
762
+ assert_eq(b1, b2)
763
+
764
+
765
+ def test_histogram_bins_range_with_nan_array():
766
+ # Regression test for issue #3977
767
+ v = da.from_array(np.array([-2, np.nan, 2]), chunks=1)
768
+ (a1, b1) = da.histogram(v, bins=10, range=(-3, 3))
769
+ (a2, b2) = np.histogram(v, bins=10, range=(-3, 3))
770
+ assert_eq(a1, a2)
771
+ assert_eq(b1, b2)
772
+
773
+
774
+ def test_histogram_return_type():
775
+ v = da.random.default_rng().random(100, chunks=10)
776
+ bins = np.arange(0, 1.01, 0.01)
777
+ # Check if return type is same as hist
778
+ bins = np.arange(0, 11, 1, dtype="i4")
779
+ assert_eq(da.histogram(v * 10, bins=bins)[0], np.histogram(v * 10, bins=bins)[0])
780
+
781
+
782
+ def test_histogram_extra_args_and_shapes():
783
+ # Check for extra args and shapes
784
+ bins = np.arange(0, 1.01, 0.01)
785
+ v = da.random.default_rng().random(100, chunks=10)
786
+ data = [
787
+ (v, bins, da.ones(100, chunks=v.chunks) * 5),
788
+ (
789
+ da.random.default_rng().random((50, 50), chunks=10),
790
+ bins,
791
+ da.ones((50, 50), chunks=10) * 5,
792
+ ),
793
+ ]
794
+
795
+ for v, bins, w in data:
796
+ # density
797
+ assert_eq(
798
+ da.histogram(v, bins=bins, density=True)[0],
799
+ np.histogram(v, bins=bins, density=True)[0],
800
+ )
801
+
802
+ # weights
803
+ assert_eq(
804
+ da.histogram(v, bins=bins, weights=w)[0],
805
+ np.histogram(v, bins=bins, weights=w)[0],
806
+ )
807
+
808
+ assert_eq(
809
+ da.histogram(v, bins=bins, weights=w, density=True)[0],
810
+ da.histogram(v, bins=bins, weights=w, density=True)[0],
811
+ )
812
+
813
+
814
+ def test_histogram_normed_deprecation():
815
+ x = da.arange(10)
816
+ with pytest.raises(ValueError) as info:
817
+ da.histogram(x, bins=[1, 2, 3], normed=True)
818
+
819
+ assert "density" in str(info.value)
820
+ assert "deprecated" in str(info.value).lower()
821
+
822
+
823
+ @pytest.mark.parametrize(
824
+ "bins, hist_range",
825
+ [
826
+ (None, None),
827
+ (10, None),
828
+ (10, 1),
829
+ (None, (1, 10)),
830
+ (10, [0, 1, 2]),
831
+ (10, [0]),
832
+ (10, np.array([[0, 1]])),
833
+ (10, da.array([[0, 1]])),
834
+ ([[0, 1, 2]], None),
835
+ (np.array([[0, 1, 2]]), None),
836
+ (da.array([[0, 1, 2]]), None),
837
+ ],
838
+ )
839
+ def test_histogram_bin_range_raises(bins, hist_range):
840
+ data = da.random.default_rng().random(10, chunks=2)
841
+ with pytest.raises((ValueError, TypeError)) as info:
842
+ da.histogram(data, bins=bins, range=hist_range)
843
+ err_msg = str(info.value)
844
+ assert "bins" in err_msg or "range" in err_msg
845
+
846
+
847
+ @pytest.mark.parametrize("density", [True, False])
848
+ @pytest.mark.parametrize("weighted", [True, False])
849
+ @pytest.mark.parametrize("non_delayed_i", [None, 0])
850
+ @pytest.mark.parametrize("delay_n_bins", [False, True])
851
+ def test_histogram_delayed_range(density, weighted, non_delayed_i, delay_n_bins):
852
+ n = 100
853
+ v = np.random.default_rng().random(n)
854
+ vd = da.from_array(v, chunks=2)
855
+
856
+ if weighted:
857
+ weights = np.random.default_rng().random(n)
858
+ weights_d = da.from_array(weights, chunks=vd.chunks)
859
+
860
+ d_range = [vd.min(), vd.max()]
861
+ if non_delayed_i is not None:
862
+ d_range[non_delayed_i] = d_range[non_delayed_i].compute()
863
+ hist_d, bins_d = da.histogram(
864
+ vd,
865
+ bins=da.array(n) if delay_n_bins and not density else n,
866
+ range=d_range,
867
+ density=density,
868
+ weights=weights_d if weighted else None,
869
+ )
870
+
871
+ hist, bins = np.histogram(
872
+ v,
873
+ bins=n,
874
+ range=[v.min(), v.max()],
875
+ density=density,
876
+ weights=weights if weighted else None,
877
+ )
878
+
879
+ assert_eq(hist_d, hist)
880
+ assert_eq(bins_d, bins)
881
+
882
+
883
+ @pytest.mark.parametrize("density", [True, False])
884
+ @pytest.mark.parametrize("weighted", [True, False])
885
+ def test_histogram_delayed_bins(density, weighted):
886
+ n = 100
887
+ v = np.random.default_rng().random(n)
888
+ bins = np.array([0, 0.2, 0.5, 0.8, 1])
889
+
890
+ vd = da.from_array(v, chunks=10)
891
+ bins_d = da.from_array(bins, chunks=2)
892
+
893
+ if weighted:
894
+ weights = np.random.default_rng().random(n)
895
+ weights_d = da.from_array(weights, chunks=vd.chunks)
896
+
897
+ hist_d, bins_d2 = da.histogram(
898
+ vd,
899
+ bins=bins_d,
900
+ range=[bins_d[0], bins_d[-1]],
901
+ density=density,
902
+ weights=weights_d if weighted else None,
903
+ )
904
+
905
+ hist, bins = np.histogram(
906
+ v,
907
+ bins=bins,
908
+ range=[bins[0], bins[-1]],
909
+ density=density,
910
+ weights=weights if weighted else None,
911
+ )
912
+
913
+ assert bins_d is bins_d2
914
+ # The HLG that is assembled from the bins and the range triggers a sanity
915
+ # check because they contain duplicate keys and the HLG dependencies are not
916
+ # reflecting this properly. Graph is perfectly fine but the check fails.
917
+ assert_eq(hist_d, hist, check_graph=False)
918
+ assert_eq(bins_d2, bins, check_graph=False)
919
+
920
+
921
+ def test_histogram_delayed_n_bins_raises_with_density():
922
+ data = da.random.default_rng().random(10, chunks=2)
923
+ with pytest.raises(NotImplementedError, match="`bins` cannot be a scalar Dask object"):
924
+ da.histogram(data, bins=da.array(10), range=[0, 1], density=True)
925
+
926
+
927
+ @pytest.mark.parametrize("weights", [True, False])
928
+ @pytest.mark.parametrize("density", [True, False])
929
+ @pytest.mark.parametrize("bins", [(5, 6), 5])
930
+ def test_histogram2d(weights, density, bins):
931
+ rng = da.random.default_rng()
932
+ n = 800
933
+ b = bins
934
+ r = ((0, 1), (0, 1))
935
+ x = rng.uniform(0, 1, size=(n,), chunks=(200,))
936
+ y = rng.uniform(0, 1, size=(n,), chunks=(200,))
937
+ w = rng.uniform(0.2, 1.1, size=(n,), chunks=(200,)) if weights else None
938
+ a1, b1x, b1y = da.histogram2d(x, y, bins=b, range=r, density=density, weights=w)
939
+ a2, b2x, b2y = np.histogram2d(x, y, bins=b, range=r, density=density, weights=w)
940
+ a3, b3x, b3y = np.histogram2d(
941
+ x.compute(),
942
+ y.compute(),
943
+ bins=b,
944
+ range=r,
945
+ density=density,
946
+ weights=w.compute() if weights else None,
947
+ )
948
+ assert_eq(a1, a2)
949
+ assert_eq(a1, a3)
950
+ if not (weights or density):
951
+ assert a1.sum() == n
952
+ assert a2.sum() == n
953
+ assert same_keys(
954
+ da.histogram2d(x, y, bins=b, range=r, density=density, weights=w)[0],
955
+ a1,
956
+ )
957
+ assert a1.compute().shape == a3.shape
958
+
959
+
960
+ @pytest.mark.parametrize("weights", [True, False])
961
+ @pytest.mark.parametrize("density", [True, False])
962
+ def test_histogram2d_array_bins(weights, density):
963
+ rng = da.random.default_rng()
964
+ n = 800
965
+ xbins = [0.0, 0.2, 0.6, 0.9, 1.0]
966
+ ybins = [0.0, 0.1, 0.4, 0.5, 1.0]
967
+ b = [xbins, ybins]
968
+ x = rng.uniform(0, 1, size=(n,), chunks=(200,))
969
+ y = rng.uniform(0, 1, size=(n,), chunks=(200,))
970
+ w = rng.uniform(0.2, 1.1, size=(n,), chunks=(200,)) if weights else None
971
+ a1, b1x, b1y = da.histogram2d(x, y, bins=b, density=density, weights=w)
972
+ a2, b2x, b2y = np.histogram2d(x, y, bins=b, density=density, weights=w)
973
+ a3, b3x, b3y = np.histogram2d(
974
+ x.compute(),
975
+ y.compute(),
976
+ bins=b,
977
+ density=density,
978
+ weights=w.compute() if weights else None,
979
+ )
980
+ assert_eq(a1, a2)
981
+ assert_eq(a1, a3)
982
+ if not (weights or density):
983
+ assert a1.sum() == n
984
+ assert a2.sum() == n
985
+ assert same_keys(
986
+ da.histogram2d(x, y, bins=b, density=density, weights=w)[0],
987
+ a1,
988
+ )
989
+ assert a1.compute().shape == a3.shape
990
+
991
+
992
+ def test_histogramdd():
993
+ n1, n2 = 800, 3
994
+ x = da.random.default_rng().uniform(0, 1, size=(n1, n2), chunks=(200, 3))
995
+ bins = [[0, 0.5, 1], [0, 0.25, 0.85, 1], [0, 0.5, 0.8, 1]]
996
+ (a1, b1) = da.histogramdd(x, bins=bins)
997
+ (a2, b2) = np.histogramdd(x, bins=bins)
998
+ (a3, b3) = np.histogramdd(x.compute(), bins=bins)
999
+ assert_eq(a1, a2)
1000
+ assert_eq(a1, a3)
1001
+ assert a1.sum() == n1
1002
+ assert a2.sum() == n1
1003
+ assert same_keys(da.histogramdd(x, bins=bins)[0], a1)
1004
+ assert a1.compute().shape == a3.shape
1005
+
1006
+
1007
+ def test_histogramdd_seq_of_arrays():
1008
+ rng = da.random.default_rng()
1009
+ n1 = 800
1010
+ x = rng.uniform(size=(n1,), chunks=200)
1011
+ y = rng.uniform(size=(n1,), chunks=200)
1012
+ bx = [0.0, 0.25, 0.75, 1.0]
1013
+ by = [0.0, 0.30, 0.70, 0.8, 1.0]
1014
+ (a1, b1) = da.histogramdd([x, y], bins=[bx, by])
1015
+ (a2, b2) = np.histogramdd([x, y], bins=[bx, by])
1016
+ (a3, b3) = np.histogramdd((x.compute(), y.compute()), bins=[bx, by])
1017
+ assert_eq(a1, a2)
1018
+ assert_eq(a1, a3)
1019
+
1020
+
1021
+ def test_histogramdd_alternative_bins_range():
1022
+ # test for normal input
1023
+ n1, n2 = 600, 3
1024
+ x = da.random.default_rng().uniform(0, 1, size=(n1, n2), chunks=((200, 200, 200), (3,)))
1025
+ bins = (3, 5, 4)
1026
+ ranges = ((0, 1),) * len(bins)
1027
+ (a1, b1) = da.histogramdd(x, bins=bins, range=ranges)
1028
+ (a2, b2) = np.histogramdd(x, bins=bins, range=ranges)
1029
+ (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges)
1030
+ assert_eq(a1, a2)
1031
+ assert_eq(a1, a3)
1032
+ bins = 4
1033
+ (a1, b1) = da.histogramdd(x, bins=bins, range=ranges)
1034
+ (a2, b2) = np.histogramdd(x, bins=bins, range=ranges)
1035
+ assert_eq(a1, a2)
1036
+
1037
+ assert a1.sum() == n1
1038
+ assert a2.sum() == n1
1039
+ assert same_keys(da.histogramdd(x, bins=bins, range=ranges)[0], a1)
1040
+
1041
+
1042
+ def test_histogramdd_weighted():
1043
+ rng = da.random.default_rng()
1044
+ # test for normal input
1045
+ n1, n2 = 600, 3
1046
+ x = rng.uniform(0, 1, size=(n1, n2), chunks=((200, 200, 200), (3,)))
1047
+ w = rng.uniform(0.5, 0.8, size=(n1,), chunks=200)
1048
+ bins = (3, 5, 4)
1049
+ ranges = ((0, 1),) * len(bins)
1050
+ (a1, b1) = da.histogramdd(x, bins=bins, range=ranges, weights=w)
1051
+ (a2, b2) = np.histogramdd(x, bins=bins, range=ranges, weights=w)
1052
+ (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges, weights=w.compute())
1053
+ assert_eq(a1, a2)
1054
+ assert_eq(a1, a3)
1055
+ bins = 4
1056
+ (a1, b1) = da.histogramdd(x, bins=bins, range=ranges, weights=w)
1057
+ (a2, b2) = np.histogramdd(x, bins=bins, range=ranges, weights=w)
1058
+ (a3, b3) = np.histogramdd(x.compute(), bins=bins, range=ranges, weights=w.compute())
1059
+ assert_eq(a1, a2)
1060
+ assert_eq(a1, a3)
1061
+
1062
+
1063
+ def test_histogramdd_density():
1064
+ n1, n2 = 800, 3
1065
+ x = da.random.default_rng().uniform(0, 1, size=(n1, n2), chunks=(200, 3))
1066
+ bins = [[0, 0.5, 1], [0, 0.25, 0.85, 1], [0, 0.5, 0.8, 1]]
1067
+ (a1, b1) = da.histogramdd(x, bins=bins, density=True)
1068
+ (a2, b2) = np.histogramdd(x, bins=bins, density=True)
1069
+ (a3, b3) = da.histogramdd(x, bins=bins, normed=True)
1070
+ (a4, b4) = np.histogramdd(x.compute(), bins=bins, density=True)
1071
+ assert_eq(a1, a2)
1072
+ assert_eq(a1, a3)
1073
+ assert_eq(a1, a4)
1074
+ assert same_keys(da.histogramdd(x, bins=bins, density=True)[0], a1)
1075
+
1076
+
1077
+ def test_histogramdd_weighted_density():
1078
+ rng = da.random.default_rng()
1079
+ n1, n2 = 1200, 4
1080
+ x = rng.standard_normal(size=(n1, n2), chunks=(200, 4))
1081
+ w = rng.uniform(0.5, 1.2, size=(n1,), chunks=200)
1082
+ bins = (5, 6, 7, 8)
1083
+ ranges = ((-4, 4),) * len(bins)
1084
+ (a1, b1) = da.histogramdd(x, bins=bins, range=ranges, weights=w, density=True)
1085
+ (a2, b2) = np.histogramdd(x, bins=bins, range=ranges, weights=w, density=True)
1086
+ (a3, b3) = da.histogramdd(x, bins=bins, range=ranges, weights=w, normed=True)
1087
+ assert_eq(a1, a2)
1088
+ assert_eq(a1, a3)
1089
+
1090
+
1091
+ def test_histogramdd_raises_incompat_sample_chunks():
1092
+ data = da.random.default_rng().random(size=(10, 3), chunks=(5, 1))
1093
+ with pytest.raises(ValueError, match="Input array can only be chunked along the 0th axis"):
1094
+ da.histogramdd(data, bins=10, range=((0, 1),) * 3)
1095
+
1096
+
1097
+ def test_histogramdd_raises_incompat_multiarg_chunks():
1098
+ rng = da.random.default_rng()
1099
+ x = rng.random(size=(10,), chunks=2)
1100
+ y = rng.random(size=(10,), chunks=2)
1101
+ z = rng.random(size=(10,), chunks=5)
1102
+ with pytest.raises(ValueError, match="All coordinate arrays must be chunked identically."):
1103
+ da.histogramdd((x, y, z), bins=(3,) * 3, range=((0, 1),) * 3)
1104
+
1105
+
1106
+ def test_histogramdd_raises_incompat_weight_chunks():
1107
+ rng = da.random.default_rng()
1108
+ x = rng.random(size=(10,), chunks=2)
1109
+ y = rng.random(size=(10,), chunks=2)
1110
+ z = da.atleast_2d((x, y)).T.rechunk((2, 2))
1111
+ w = rng.random(size=(10,), chunks=5)
1112
+ with pytest.raises(
1113
+ ValueError,
1114
+ match="Input arrays and weights must have the same shape and chunk structure.",
1115
+ ):
1116
+ da.histogramdd((x, y), bins=(3,) * 2, range=((0, 1),) * 2, weights=w)
1117
+ with pytest.raises(
1118
+ ValueError,
1119
+ match="Input array and weights must have the same shape and chunk structure along the first dimension.",
1120
+ ):
1121
+ da.histogramdd(z, bins=(3,) * 2, range=((0, 1),) * 2, weights=w)
1122
+
1123
+
1124
+ def test_histogramdd_raises_incompat_bins_or_range():
1125
+ data = da.random.default_rng().random(size=(10, 4), chunks=(5, 4))
1126
+ bins = (2, 3, 4, 5)
1127
+ ranges = ((0, 1),) * len(bins)
1128
+
1129
+ # bad number of bins defined (should be data.shape[1])
1130
+ bins = (2, 3, 4)
1131
+ with pytest.raises(
1132
+ ValueError,
1133
+ match="The dimension of bins must be equal to the dimension of the sample.",
1134
+ ):
1135
+ da.histogramdd(data, bins=bins, range=ranges)
1136
+
1137
+ # one range per dimension is required.
1138
+ bins = (2, 3, 4, 5)
1139
+ ranges = ((0, 1),) * 3
1140
+ with pytest.raises(
1141
+ ValueError,
1142
+ match="range argument requires one entry, a min max pair, per dimension.",
1143
+ ):
1144
+ da.histogramdd(data, bins=bins, range=ranges)
1145
+
1146
+ # has range elements that are not pairs
1147
+ with pytest.raises(ValueError, match="range argument should be a sequence of pairs"):
1148
+ da.histogramdd(data, bins=bins, range=((0, 1), (0, 1, 2), 3, 5))
1149
+
1150
+
1151
+ def test_histogramdd_raise_normed_and_density():
1152
+ data = da.random.default_rng().random(size=(10, 3), chunks=(5, 3))
1153
+ bins = (4, 5, 6)
1154
+ ranges = ((0, 1),) * 3
1155
+ with pytest.raises(TypeError, match="Cannot specify both 'normed' and 'density'"):
1156
+ da.histogramdd(data, bins=bins, range=ranges, normed=True, density=True)
1157
+
1158
+
1159
+ def test_histogramdd_raise_incompat_shape():
1160
+ # 1D
1161
+ data = da.random.default_rng().random(size=(10,), chunks=(2,))
1162
+ with pytest.raises(ValueError, match="Single array input to histogramdd should be columnar"):
1163
+ da.histogramdd(data, bins=4, range=((-3, 3),))
1164
+ # 3D (not columnar)
1165
+ data = da.random.default_rng().random(size=(4, 4, 4), chunks=(2, 2, 2))
1166
+ with pytest.raises(ValueError, match="Single array input to histogramdd should be columnar"):
1167
+ da.histogramdd(data, bins=4, range=((-3, 3),))
1168
+
1169
+
1170
+ def test_histogramdd_edges():
1171
+ data = da.random.default_rng().random(size=(10, 3), chunks=(5, 3))
1172
+ edges = [
1173
+ np.array([0.1, 0.3, 0.8, 1.0]),
1174
+ np.array([0.2, 0.3, 0.8, 0.9]),
1175
+ np.array([0.1, 0.5, 0.7]),
1176
+ ]
1177
+ # passing bins as an array of bin edges.
1178
+ a1, b1 = da.histogramdd(data, bins=edges)
1179
+ a2, b2 = np.histogramdd(data.compute(), bins=edges)
1180
+ for ib1, ib2 in zip(b1, b2):
1181
+ assert_eq(ib1, ib2)
1182
+ # passing bins as an int with range definitions
1183
+ a1, b1 = da.histogramdd(data, bins=5, range=((0, 1),) * 3)
1184
+ a2, b2 = np.histogramdd(data.compute(), bins=5, range=((0, 1),) * 3)
1185
+ for ib1, ib2 in zip(b1, b2):
1186
+ assert_eq(ib1, ib2)
1187
+
1188
+
1189
+ def test_cov():
1190
+ x = np.arange(56).reshape((7, 8))
1191
+ d = da.from_array(x, chunks=(4, 4))
1192
+
1193
+ assert_eq(da.cov(d), np.cov(x))
1194
+ assert_eq(da.cov(d, rowvar=0), np.cov(x, rowvar=0))
1195
+ with warnings.catch_warnings():
1196
+ warnings.simplefilter("ignore", category=RuntimeWarning) # dof <= 0 for slice
1197
+ assert_eq(da.cov(d, ddof=10), np.cov(x, ddof=10))
1198
+ assert_eq(da.cov(d, bias=1), np.cov(x, bias=1))
1199
+ assert_eq(da.cov(d, d), np.cov(x, x))
1200
+
1201
+ y = np.arange(8)
1202
+ e = da.from_array(y, chunks=(4,))
1203
+
1204
+ assert_eq(da.cov(d, e), np.cov(x, y))
1205
+ assert_eq(da.cov(e, d), np.cov(y, x))
1206
+
1207
+ with pytest.raises(ValueError):
1208
+ da.cov(d, ddof=1.5)
1209
+
1210
+
1211
+ @pytest.mark.skipif(not NUMPY_GE_220, reason="fweights is not an kwarg prior to numpy 2.2")
1212
+ def test_cov_fweights():
1213
+ x1 = da.array([[0, 2], [1, 1], [2, 0]]).T
1214
+ res1 = da.array([[1.0, -1.0], [-1.0, 1.0]])
1215
+ x2 = da.array([0.0, 1.0, 2.0], ndmin=2)
1216
+ frequencies = da.array([1, 4, 1])
1217
+ x2_repeats = da.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T
1218
+ res2 = da.array([[0.4, -0.4], [-0.4, 0.4]])
1219
+ unit_frequencies = np.ones(3, dtype=np.int_)
1220
+
1221
+ result = da.cov(x2, fweights=frequencies)
1222
+ expected = np.cov(x2.compute(), fweights=frequencies.compute())
1223
+ assert_eq(result, expected)
1224
+
1225
+ assert allclose(da.cov(x2, fweights=frequencies), da.cov(x2_repeats))
1226
+ assert allclose(da.cov(x1, fweights=frequencies), res2)
1227
+ assert allclose(da.cov(x1, fweights=unit_frequencies), res1)
1228
+
1229
+ f = da.ones((2, 3), dtype=np.int_)
1230
+ with pytest.raises(RuntimeError):
1231
+ da.cov(x1, fweights=f)
1232
+
1233
+ f = da.ones(2, dtype=np.int_)
1234
+ with pytest.raises(RuntimeError):
1235
+ da.cov(x1, fweights=f)
1236
+
1237
+
1238
+ @pytest.mark.skipif(not NUMPY_GE_220, reason="aweights is not an kwarg prior to numpy 2.2")
1239
+ def test_cov_aweights():
1240
+ x1 = da.array([[0, 2], [1, 1], [2, 0]]).T
1241
+ res1 = da.array([[1.0, -1.0], [-1.0, 1.0]])
1242
+
1243
+ # Test basic functionality with aweights
1244
+ aweights = da.array([0.5, 2.0, 0.5]) # Analytical weights
1245
+ unit_aweights = np.ones(3, dtype=np.float64)
1246
+
1247
+ # With unit weights, should match unweighted result
1248
+ assert allclose(da.cov(x1, aweights=unit_aweights), res1)
1249
+
1250
+ # Test that aweights affects the covariance calculation
1251
+ weighted_result = da.cov(x1, aweights=aweights)
1252
+ unweighted_result = da.cov(x1)
1253
+ # Results should be different when using non-unit weights
1254
+ assert not allclose(weighted_result, unweighted_result)
1255
+
1256
+ # Test with different weight patterns
1257
+ equal_weights = da.array([1.0, 1.0, 1.0])
1258
+ assert allclose(da.cov(x1, aweights=equal_weights), da.cov(x1))
1259
+
1260
+ # Multidimensional aweights should raise RuntimeError
1261
+ multidim_weights = da.ones((2, 3), dtype=np.float64)
1262
+ with pytest.raises(RuntimeError):
1263
+ da.cov(x1, aweights=multidim_weights)
1264
+
1265
+ # Wrong length aweights should raise RuntimeError
1266
+ wrong_length_weights = da.ones(2, dtype=np.float64)
1267
+ with pytest.raises(RuntimeError):
1268
+ da.cov(x1, aweights=wrong_length_weights)
1269
+
1270
+
1271
+ @pytest.mark.skipif(not NUMPY_GE_220, reason="fweights and aweights are not kwargs prior to numpy 2.2")
1272
+ def test_cov_fweights_aweights_combined():
1273
+ x1 = da.array([[0, 2], [1, 1], [2, 0]]).T
1274
+
1275
+ # Test combining both frequency and analytical weights
1276
+ fweights = da.array([1, 2, 1]) # Frequency weights (integers)
1277
+ aweights = da.array([0.5, 1.0, 2.0]) # Analytical weights (floats)
1278
+
1279
+ # Should work without error when both are provided
1280
+ result = da.cov(x1, fweights=fweights, aweights=aweights)
1281
+ assert result.shape == (2, 2)
1282
+
1283
+ # Result should be different from using either weight alone
1284
+ result_f_only = da.cov(x1, fweights=fweights)
1285
+ result_a_only = da.cov(x1, aweights=aweights)
1286
+
1287
+ assert not allclose(result, result_f_only)
1288
+ assert not allclose(result, result_a_only)
1289
+
1290
+
1291
+ def test_corrcoef():
1292
+ x = np.arange(56).reshape((7, 8))
1293
+ d = da.from_array(x, chunks=(4, 4))
1294
+
1295
+ assert_eq(da.corrcoef(d), np.corrcoef(x))
1296
+ assert_eq(da.corrcoef(d, rowvar=0), np.corrcoef(x, rowvar=0))
1297
+ assert_eq(da.corrcoef(d, d), np.corrcoef(x, x))
1298
+
1299
+ y = np.arange(8)
1300
+ e = da.from_array(y, chunks=(4,))
1301
+
1302
+ assert_eq(da.corrcoef(d, e), np.corrcoef(x, y))
1303
+ assert_eq(da.corrcoef(e, d), np.corrcoef(y, x))
1304
+
1305
+ d = da.array([[1, 2]])
1306
+ x = np.array([[1, 2]])
1307
+
1308
+ if NUMPY_GE_220:
1309
+ with pytest.warns(RuntimeWarning):
1310
+ assert_eq(da.corrcoef(d, rowvar=False), np.corrcoef(x, rowvar=False))
1311
+
1312
+
1313
+ def test_round():
1314
+ x = np.random.default_rng().random(10)
1315
+ d = da.from_array(x, chunks=4)
1316
+
1317
+ for i in (0, 1, 4, 5):
1318
+ assert_eq(x.round(i), d.round(i))
1319
+
1320
+ assert_eq(d.round(2), da.round(d, 2))
1321
+
1322
+
1323
+ @pytest.mark.parametrize("return_index", [False, True])
1324
+ @pytest.mark.parametrize("return_inverse", [False, True])
1325
+ @pytest.mark.parametrize("return_counts", [False, True])
1326
+ def test_unique_kwargs(return_index, return_inverse, return_counts):
1327
+ kwargs = dict(
1328
+ return_index=return_index,
1329
+ return_inverse=return_inverse,
1330
+ return_counts=return_counts,
1331
+ )
1332
+
1333
+ a = np.array([1, 2, 4, 4, 5, 2])
1334
+ d = da.from_array(a, chunks=(3,))
1335
+
1336
+ r_a = np.unique(a, **kwargs)
1337
+ r_d = da.unique(d, **kwargs)
1338
+
1339
+ if not any([return_index, return_inverse, return_counts]):
1340
+ assert isinstance(r_a, np.ndarray)
1341
+ assert isinstance(r_d, da.Array)
1342
+
1343
+ r_a = (r_a,)
1344
+ r_d = (r_d,)
1345
+
1346
+ assert len(r_a) == len(r_d)
1347
+
1348
+ if return_inverse:
1349
+ i = 1 + int(return_index)
1350
+ assert (d.size,) == r_d[i].shape
1351
+
1352
+ for e_r_a, e_r_d in zip(r_a, r_d):
1353
+ assert_eq(e_r_d, e_r_a)
1354
+
1355
+
1356
+ @pytest.mark.parametrize("seed", [23, 796])
1357
+ @pytest.mark.parametrize(
1358
+ "shape, chunks",
1359
+ [[(10,), (5,)], [(10,), (3,)], [(4, 5), (3, 2)], [(20, 20), (4, 5)]],
1360
+ )
1361
+ def test_unique_rand(seed, shape, chunks):
1362
+ rng = np.random.default_rng(seed)
1363
+
1364
+ a = rng.integers(0, 10, size=shape)
1365
+ d = da.from_array(a, chunks=chunks)
1366
+
1367
+ r_a = np.unique(a, return_index=True, return_inverse=True, return_counts=True)
1368
+ r_d = da.unique(d, return_index=True, return_inverse=True, return_counts=True)
1369
+
1370
+ assert_eq(r_d[0], r_a[0])
1371
+ assert_eq(r_d[1], r_a[1])
1372
+ assert_eq(r_d[2], r_a[2])
1373
+ assert_eq(r_d[3], r_a[3])
1374
+
1375
+
1376
+ @pytest.mark.parametrize("seed", [23, 796])
1377
+ @pytest.mark.parametrize("low, high", [[0, 10]])
1378
+ @pytest.mark.parametrize(
1379
+ "elements_shape, elements_chunks",
1380
+ [[(10,), (5,)], [(10,), (3,)], [(4, 5), (3, 2)], [(20, 20), (4, 5)]],
1381
+ )
1382
+ @pytest.mark.parametrize(
1383
+ "test_shape, test_chunks",
1384
+ [[(10,), (5,)], [(10,), (3,)], [(4, 5), (3, 2)], [(20, 20), (4, 5)]],
1385
+ )
1386
+ @pytest.mark.parametrize("invert", [True, False])
1387
+ def test_isin_rand(seed, low, high, elements_shape, elements_chunks, test_shape, test_chunks, invert):
1388
+ rng = np.random.default_rng(seed)
1389
+
1390
+ a1 = rng.integers(low, high, size=elements_shape)
1391
+ d1 = da.from_array(a1, chunks=elements_chunks)
1392
+
1393
+ a2 = rng.integers(low, high, size=test_shape) - 5
1394
+ d2 = da.from_array(a2, chunks=test_chunks)
1395
+
1396
+ with warnings.catch_warnings():
1397
+ warnings.simplefilter("ignore", category=da.PerformanceWarning)
1398
+ r_a = np.isin(a1, a2, invert=invert)
1399
+ r_d = da.isin(d1, d2, invert=invert)
1400
+ assert_eq(r_a, r_d)
1401
+
1402
+
1403
+ @pytest.mark.parametrize("assume_unique", [True, False])
1404
+ def test_isin_assume_unique(assume_unique):
1405
+ a1 = np.arange(10)
1406
+ d1 = da.from_array(a1, chunks=(5,))
1407
+
1408
+ test_elements = np.arange(0, 10, 2)
1409
+ r_a = np.isin(a1, test_elements, assume_unique=assume_unique)
1410
+ r_d = da.isin(d1, test_elements, assume_unique=assume_unique)
1411
+ assert_eq(r_a, r_d)
1412
+
1413
+
1414
+ def _maybe_len(l):
1415
+ try:
1416
+ return len(l)
1417
+ except TypeError:
1418
+ return 0
1419
+
1420
+
1421
+ @pytest.mark.parametrize("chunks", [(4, 6), (2, 6)])
1422
+ @pytest.mark.parametrize("shift", [3, 7, 9, (3, 9), (7, 2)])
1423
+ @pytest.mark.parametrize("axis", [None, 0, 1, -1, (0, 1), (1, 0)])
1424
+ def test_roll(chunks, shift, axis):
1425
+ x = np.random.default_rng().integers(10, size=(4, 6))
1426
+ a = da.from_array(x, chunks=chunks)
1427
+
1428
+ if _maybe_len(shift) != _maybe_len(axis):
1429
+ with pytest.raises(TypeError if axis is None else ValueError):
1430
+ da.roll(a, shift, axis)
1431
+ else:
1432
+ assert_eq(np.roll(x, shift, axis), da.roll(a, shift, axis))
1433
+
1434
+
1435
+ def test_roll_always_results_in_a_new_array():
1436
+ x = da.arange(2, 3)
1437
+ y = da.roll(x, 1)
1438
+ assert y is not x
1439
+
1440
+
1441
+ def test_roll_works_even_if_shape_is_0():
1442
+ expected = np.roll(np.zeros(0), 0)
1443
+ actual = da.roll(da.zeros(0), 0)
1444
+ assert_eq(expected, actual)
1445
+
1446
+
1447
+ @pytest.mark.parametrize("shape", [(10,), (5, 10), (5, 10, 10)])
1448
+ def test_shape_and_ndim(shape):
1449
+ x = da.random.default_rng().random(shape)
1450
+ assert np.shape(x) == shape
1451
+
1452
+ x = da.random.default_rng().random(shape)
1453
+ assert np.ndim(x) == len(shape)
1454
+
1455
+
1456
+ @pytest.mark.parametrize("shape", [((12,), (12,)), ((4, 3), (3, 4)), ((12,), (1, 6, 2))])
1457
+ @pytest.mark.parametrize("reverse", [True, False])
1458
+ def test_union1d(shape, reverse):
1459
+ s1, s2 = shape
1460
+ x1 = np.arange(12).reshape(s1)
1461
+ x2 = np.arange(6, 18).reshape(s2)
1462
+
1463
+ if reverse:
1464
+ x1 = x1[::-1]
1465
+
1466
+ dx1 = da.from_array(x1)
1467
+ dx2 = da.from_array(x2)
1468
+
1469
+ result = np.union1d(dx1, dx2)
1470
+ expected = np.union1d(x1, x2)
1471
+
1472
+ assert isinstance(result, da.Array)
1473
+
1474
+ assert_eq(result, expected)
1475
+
1476
+
1477
+ def test_ravel():
1478
+ x = np.random.default_rng().integers(10, size=(4, 6))
1479
+
1480
+ # 2d
1481
+ for chunks in [(4, 6), (2, 6)]:
1482
+ a = da.from_array(x, chunks=chunks)
1483
+ assert_eq(x.ravel(), a.ravel())
1484
+ assert len(a.ravel().dask) == len(a.dask) + len(a.chunks[0])
1485
+
1486
+ # 0d
1487
+ assert_eq(x[0, 0].ravel(), a[0, 0].ravel())
1488
+
1489
+ # 1d
1490
+ a_flat = a.ravel()
1491
+ assert_eq(a_flat.ravel(), a_flat)
1492
+
1493
+ # 3d
1494
+ x = np.random.default_rng().integers(10, size=(2, 3, 4))
1495
+ for chunks in [4, (1, 3, 4)]:
1496
+ a = da.from_array(x, chunks=chunks)
1497
+ assert_eq(x.ravel(), a.ravel())
1498
+
1499
+ assert_eq(x.flatten(), a.flatten())
1500
+ assert_eq(np.ravel(x), da.ravel(a))
1501
+
1502
+
1503
+ def test_ravel_1D_no_op():
1504
+ x = np.random.default_rng().integers(10, size=100)
1505
+ dx = da.from_array(x, chunks=10)
1506
+ # known dims
1507
+ assert_eq(dx.ravel(), x.ravel())
1508
+ # Unknown dims
1509
+ assert_eq(dx[dx > 2].ravel(), x[x > 2].ravel())
1510
+
1511
+
1512
+ def test_ravel_with_array_like():
1513
+ # int
1514
+ assert_eq(np.ravel(0), da.ravel(0))
1515
+ assert isinstance(da.ravel(0), da.Array)
1516
+
1517
+ # list
1518
+ assert_eq(np.ravel([0, 0]), da.ravel([0, 0]))
1519
+ assert isinstance(da.ravel([0, 0]), da.Array)
1520
+
1521
+ # tuple
1522
+ assert_eq(np.ravel((0, 0)), da.ravel((0, 0)))
1523
+ assert isinstance(da.ravel((0, 0)), da.Array)
1524
+
1525
+ # nested i.e. tuples in list
1526
+ assert_eq(np.ravel([(0,), (0,)]), da.ravel([(0,), (0,)]))
1527
+ assert isinstance(da.ravel([(0,), (0,)]), da.Array)
1528
+
1529
+
1530
+ @pytest.mark.parametrize("axis", [None, 0, 1, -1, (0, 1), (0, 2), (1, 2), 2])
1531
+ def test_expand_dims(axis):
1532
+ a = np.arange(10)
1533
+ d = da.from_array(a, chunks=(3,))
1534
+
1535
+ if axis is None:
1536
+ with pytest.raises(TypeError):
1537
+ da.expand_dims(d, axis=axis)
1538
+ elif axis == 2:
1539
+ with pytest.raises(AxisError):
1540
+ da.expand_dims(d, axis=axis)
1541
+ else:
1542
+ a_e = np.expand_dims(a, axis=axis)
1543
+ d_e = da.expand_dims(d, axis=axis)
1544
+
1545
+ assert_eq(d_e, a_e)
1546
+ assert same_keys(d_e, da.expand_dims(d, axis=axis))
1547
+
1548
+
1549
+ @pytest.mark.parametrize("is_func", [True, False])
1550
+ @pytest.mark.parametrize("axis", [None, 0, -1, (0, -1)])
1551
+ def test_squeeze(is_func, axis):
1552
+ a = np.arange(10)[None, :, None, None]
1553
+ d = da.from_array(a, chunks=(1, 3, 1, 1))
1554
+
1555
+ if is_func:
1556
+ a_s = np.squeeze(a, axis=axis)
1557
+ d_s = da.squeeze(d, axis=axis)
1558
+ else:
1559
+ a_s = a.squeeze(axis=axis)
1560
+ d_s = d.squeeze(axis=axis)
1561
+
1562
+ assert_eq(d_s, a_s)
1563
+ assert same_keys(d_s, da.squeeze(d, axis=axis))
1564
+
1565
+ if axis is None:
1566
+ axis = tuple(range(a.ndim))
1567
+ else:
1568
+ axis = axis if isinstance(axis, tuple) else (axis,)
1569
+ axis = tuple(i % a.ndim for i in axis)
1570
+ axis = tuple(i for i, c in enumerate(d.chunks) if i in axis and len(c) == 1)
1571
+
1572
+ exp_d_s_chunks = tuple(c for i, c in enumerate(d.chunks) if i not in axis)
1573
+ assert d_s.chunks == exp_d_s_chunks
1574
+
1575
+
1576
+ @pytest.mark.parametrize("shape", [(1,), (1, 1)])
1577
+ def test_squeeze_1d_array(shape):
1578
+ a = np.full(shape=shape, fill_value=2)
1579
+ a_s = np.squeeze(a)
1580
+ d = da.from_array(a, chunks=(1))
1581
+ d_s = da.squeeze(d)
1582
+ assert isinstance(d_s, da.Array)
1583
+ assert isinstance(d_s.compute(), np.ndarray)
1584
+ assert_eq(d_s, a_s)
1585
+
1586
+
1587
+ def test_vstack():
1588
+ x = np.arange(5)
1589
+ y = np.ones(5)
1590
+ a = da.arange(5, chunks=2)
1591
+ b = da.ones(5, chunks=2)
1592
+
1593
+ assert_eq(np.vstack((x, y)), da.vstack((a, b)))
1594
+ assert_eq(np.vstack((x, y[None, :])), da.vstack((a, b[None, :])))
1595
+
1596
+
1597
+ def test_hstack():
1598
+ x = np.arange(5)
1599
+ y = np.ones(5)
1600
+ a = da.arange(5, chunks=2)
1601
+ b = da.ones(5, chunks=2)
1602
+
1603
+ assert_eq(np.hstack((x[None, :], y[None, :])), da.hstack((a[None, :], b[None, :])))
1604
+ assert_eq(np.hstack((x, y)), da.hstack((a, b)))
1605
+
1606
+
1607
+ def test_dstack():
1608
+ x = np.arange(5)
1609
+ y = np.ones(5)
1610
+ a = da.arange(5, chunks=2)
1611
+ b = da.ones(5, chunks=2)
1612
+
1613
+ assert_eq(
1614
+ np.dstack((x[None, None, :], y[None, None, :])),
1615
+ da.dstack((a[None, None, :], b[None, None, :])),
1616
+ )
1617
+ assert_eq(np.dstack((x[None, :], y[None, :])), da.dstack((a[None, :], b[None, :])))
1618
+ assert_eq(np.dstack((x, y)), da.dstack((a, b)))
1619
+
1620
+
1621
+ @pytest.mark.parametrize(
1622
+ "np_func,dsk_func,nan_chunk",
1623
+ [(np.hstack, da.hstack, 0), (np.dstack, da.dstack, 1), (np.vstack, da.vstack, 2)],
1624
+ )
1625
+ def test_stack_unknown_chunk_sizes(np_func, dsk_func, nan_chunk):
1626
+ shape = (100, 100, 100)
1627
+ x = da.ones(shape, chunks=(50, 50, 50))
1628
+ y = np.ones(shape)
1629
+
1630
+ # Use boolean indexing to create unknown chunks on axis 0
1631
+ mask = da.ones(100, chunks=50) > 0.5 # Always true but dask doesn't know
1632
+ x_base = x[mask] # Shape: (nan, 100, 100), unknown on axis 0
1633
+
1634
+ # Move unknown chunks to the correct axis for each function:
1635
+ # - hstack (concat axis=1): needs unknown on axis 0 or 2 -> axis 0 works
1636
+ # - dstack (concat axis=2): needs unknown on axis 0 or 1 -> axis 0 works
1637
+ # - vstack (concat axis=0): needs unknown on axis 1 or 2 -> transpose to axis 1
1638
+ if nan_chunk == 2: # vstack needs unknown on non-0 axis
1639
+ # Transpose to move unknown from axis 0 to axis 1: (nan, 100, 100) -> (100, nan, 100)
1640
+ x = x_base.transpose(1, 0, 2)
1641
+ y = y.transpose(1, 0, 2)
1642
+ else:
1643
+ x = x_base
1644
+
1645
+ with pytest.raises(ValueError):
1646
+ dsk_func((x, x))
1647
+
1648
+ np_stacked = np_func((y, y))
1649
+ dsk_stacked = dsk_func((x, x), allow_unknown_chunksizes=True)
1650
+ assert_eq(np_stacked, dsk_stacked)
1651
+
1652
+
1653
+ def test_take():
1654
+ x = np.arange(400).reshape((20, 20))
1655
+ a = da.from_array(x, chunks=(5, 5))
1656
+
1657
+ assert_eq(np.take(x, 3, axis=0), da.take(a, 3, axis=0))
1658
+ assert_eq(np.take(x, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1))
1659
+
1660
+ with pytest.raises(ValueError):
1661
+ da.take(a, 3, axis=2)
1662
+
1663
+ assert same_keys(da.take(a, [3, 4, 5], axis=-1), da.take(a, [3, 4, 5], axis=-1))
1664
+
1665
+
1666
+ @pytest.mark.skip(reason="hangs - lazy evaluation issue")
1667
+ def test_take_large():
1668
+ a = da.arange(1_000_000_000_000, chunks=(200_000_000,), dtype="int64")
1669
+
1670
+ x = np.arange(20, dtype="int64")
1671
+ assert_eq(da.take(a, x, axis=0), x)
1672
+
1673
+ x = np.arange(50, 300, dtype="int64")
1674
+ assert_eq(da.take(a, x, axis=0), x)
1675
+
1676
+
1677
+ def test_take_dask_from_numpy():
1678
+ x = np.arange(5).astype("f8")
1679
+ y = da.from_array(np.array([1, 2, 3, 3, 2, 1]), chunks=3)
1680
+
1681
+ z = da.take(x * 2, y)
1682
+
1683
+ assert z.chunks == y.chunks
1684
+ assert_eq(z, np.array([2.0, 4.0, 6.0, 6.0, 4.0, 2.0]))
1685
+
1686
+
1687
+ def test_compress():
1688
+ x = np.arange(25).reshape((5, 5))
1689
+ a = da.from_array(x, chunks=(2, 2))
1690
+
1691
+ c1 = np.array([True, False, True, False, True])
1692
+ c2 = np.array([True, False])
1693
+ c3 = [True, False]
1694
+ dc1 = da.from_array(c1, chunks=3)
1695
+ dc2 = da.from_array(c2, chunks=2)
1696
+
1697
+ for c, dc in [(c1, c1), (c2, c2), (c3, c3), (c1, dc1), (c2, dc2), (c3, dc2)]:
1698
+ for axis in [None, 0, 1]:
1699
+ res = da.compress(dc, a, axis=axis)
1700
+ assert_eq(np.compress(c, x, axis=axis), res)
1701
+ if isinstance(dc, da.Array):
1702
+ # If condition is a dask array then we expect the shape of the
1703
+ # compressed array to be nan, because we won't know that until
1704
+ # the result is computed.
1705
+ axis = axis or 0
1706
+ assert np.isnan(res.shape[axis]).all()
1707
+ assert np.isnan(res.chunks[axis]).all()
1708
+ else:
1709
+ # If condition is a not a dask array then we expect the shape of the
1710
+ # compressed axis to be known, i.e., not nan.
1711
+ axis = axis or 0
1712
+ assert np.count_nonzero(dc) == res.shape[axis]
1713
+ assert not np.isnan(res.chunks[axis]).any()
1714
+
1715
+ with pytest.raises(ValueError):
1716
+ da.compress([True, False], a, axis=100)
1717
+
1718
+ with pytest.raises(ValueError):
1719
+ da.compress([[True], [False]], a, axis=100)
1720
+
1721
+
1722
+ def test_extract():
1723
+ x = np.arange(25).reshape((5, 5))
1724
+ a = da.from_array(x, chunks=(2, 2))
1725
+
1726
+ c1 = np.array([True, False, True, False, True])
1727
+ c2 = np.array([[True, False], [True, False]])
1728
+ c3 = np.array([True, False])
1729
+ dc1 = da.from_array(c1, chunks=3)
1730
+ dc2 = da.from_array(c2, chunks=(2, 1))
1731
+ dc3 = da.from_array(c3, chunks=2)
1732
+
1733
+ for c, dc in [(c1, c1), (c2, c2), (c3, c3), (c1, dc1), (c2, dc2), (c3, dc3)]:
1734
+ res = da.extract(dc, a)
1735
+ assert_eq(np.extract(c, x), res)
1736
+ if isinstance(dc, da.Array):
1737
+ assert np.isnan(res.chunks[0]).all()
1738
+
1739
+
1740
+ def test_isnull():
1741
+ x = np.array([1, np.nan])
1742
+ a = da.from_array(x, chunks=(2,))
1743
+ with contextlib.suppress(ImportError):
1744
+ assert_eq(da.isnull(a), np.isnan(x))
1745
+ assert_eq(da.notnull(a), ~(np.isnan(x)))
1746
+
1747
+
1748
+ def test_isnull_result_is_an_array():
1749
+ # regression test for https://github.com/dask/dask/issues/3822
1750
+ arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1)
1751
+ with contextlib.suppress(ImportError):
1752
+ result = da.isnull(arr[0]).compute()
1753
+ assert type(result) is np.ndarray
1754
+
1755
+
1756
+ def test_isclose():
1757
+ x = np.array([0, np.nan, 1, 1.5])
1758
+ y = np.array([1e-9, np.nan, 1, 2])
1759
+ a = da.from_array(x, chunks=(2,))
1760
+ b = da.from_array(y, chunks=(2,))
1761
+ assert_eq(da.isclose(a, b, equal_nan=True), np.isclose(x, y, equal_nan=True))
1762
+
1763
+
1764
+ def test_allclose():
1765
+ n_a = np.array([0, np.nan, 1, 1.5])
1766
+ n_b = np.array([1e-9, np.nan, 1, 2])
1767
+
1768
+ d_a = da.from_array(n_a, chunks=(2,))
1769
+ d_b = da.from_array(n_b, chunks=(2,))
1770
+
1771
+ n_r = np.allclose(n_a, n_b, equal_nan=True)
1772
+ d_r = da.allclose(d_a, d_b, equal_nan=True)
1773
+
1774
+ assert_eq(np.array(n_r)[()], d_r)
1775
+
1776
+
1777
+ def test_choose():
1778
+ # test choose function
1779
+ x = np.random.default_rng().integers(10, size=(15, 16))
1780
+ d = da.from_array(x, chunks=(4, 5))
1781
+
1782
+ assert_eq(da.choose(d > 5, [0, d]), np.choose(x > 5, [0, x]))
1783
+ assert_eq(da.choose(d > 5, [-d, d]), np.choose(x > 5, [-x, x]))
1784
+
1785
+ # test choose method
1786
+ index_dask = d > 5
1787
+ index_numpy = x > 5
1788
+ assert_eq(index_dask.choose([0, d]), index_numpy.choose([0, x]))
1789
+ assert_eq(index_dask.choose([-d, d]), index_numpy.choose([-x, x]))
1790
+
1791
+ indices_np = np.array([0, 0, 0, 0])
1792
+ choices_np = (np.array([10.0, 20.0, 30.0, 40.0]),)
1793
+ indices_da = da.from_array(indices_np, chunks=(2,))
1794
+ choices_da = da.from_array(choices_np, chunks=(1, 2))
1795
+
1796
+ assert_eq(np.choose(indices_np, choices_np), da.choose(indices_da, choices_da))
1797
+
1798
+
1799
+ def test_piecewise():
1800
+ rng = np.random.default_rng(1337)
1801
+
1802
+ x = rng.integers(10, size=(15, 16))
1803
+ d = da.from_array(x, chunks=(4, 5))
1804
+
1805
+ assert_eq(
1806
+ np.piecewise(x, [x < 5, x >= 5], [lambda e, v, k: e + 1, 5], 1, k=2),
1807
+ da.piecewise(d, [d < 5, d >= 5], [lambda e, v, k: e + 1, 5], 1, k=2),
1808
+ )
1809
+
1810
+
1811
+ def test_piecewise_otherwise():
1812
+ rng = np.random.default_rng(1337)
1813
+
1814
+ x = rng.integers(10, size=(15, 16))
1815
+ d = da.from_array(x, chunks=(4, 5))
1816
+
1817
+ assert_eq(
1818
+ np.piecewise(
1819
+ x,
1820
+ [x > 5, x <= 2],
1821
+ [lambda e, v, k: e + 1, lambda e, v, k: v * e, lambda e, v, k: 0],
1822
+ 1,
1823
+ k=2,
1824
+ ),
1825
+ da.piecewise(
1826
+ d,
1827
+ [d > 5, d <= 2],
1828
+ [lambda e, v, k: e + 1, lambda e, v, k: v * e, lambda e, v, k: 0],
1829
+ 1,
1830
+ k=2,
1831
+ ),
1832
+ )
1833
+
1834
+
1835
+ def test_select():
1836
+ conditions = [
1837
+ np.array([False, False, False, False]),
1838
+ np.array([False, True, False, True]),
1839
+ np.array([False, False, True, True]),
1840
+ ]
1841
+ choices = [
1842
+ np.array([1, 2, 3, 4]),
1843
+ np.array([5, 6, 7, 8]),
1844
+ np.array([9, 10, 11, 12]),
1845
+ ]
1846
+ d_conditions = da.from_array(conditions, chunks=(3, 2))
1847
+ d_choices = da.from_array(choices)
1848
+ assert_eq(np.select(conditions, choices), da.select(d_conditions, d_choices))
1849
+
1850
+
1851
+ def test_select_multidimension():
1852
+ x = np.random.default_rng().random((100, 50, 2))
1853
+ y = da.from_array(x, chunks=(50, 50, 1))
1854
+ res_x = np.select([x < 0, x > 2, x > 1], [x, x * 2, x * 3], default=1)
1855
+ res_y = da.select([y < 0, y > 2, y > 1], [y, y * 2, y * 3], default=1)
1856
+ assert isinstance(res_y, da.Array)
1857
+ assert_eq(res_y, res_x)
1858
+
1859
+
1860
+ def test_select_return_dtype():
1861
+ d = np.array([1, 2, 3, np.nan, 5, 7])
1862
+ m = np.isnan(d)
1863
+ d_d = da.from_array(d)
1864
+ d_m = da.isnan(d_d)
1865
+ assert_eq(np.select([m], [d]), da.select([d_m], [d_d]), equal_nan=True)
1866
+
1867
+
1868
+ @pytest.mark.xfail(reason="broadcasting in da.select() not implemented yet")
1869
+ def test_select_broadcasting():
1870
+ conditions = [np.array(True), np.array([False, True, False])]
1871
+ choices = [1, np.arange(12).reshape(4, 3)]
1872
+ d_conditions = da.from_array(conditions)
1873
+ d_choices = da.from_array(choices)
1874
+ assert_eq(np.select(conditions, choices), da.select(d_conditions, d_choices))
1875
+ # default can broadcast too:
1876
+ assert_eq(np.select([True], [0], default=[0]), da.select([True], [0], default=[0]))
1877
+
1878
+
1879
+ def test_argwhere():
1880
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
1881
+ x = np.random.default_rng().integers(10, size=shape)
1882
+ d = da.from_array(x, chunks=chunks)
1883
+
1884
+ x_nz = np.argwhere(x)
1885
+ d_nz = da.argwhere(d)
1886
+
1887
+ assert_eq(d_nz, x_nz)
1888
+
1889
+
1890
+ def test_argwhere_obj():
1891
+ x = np.random.default_rng().integers(10, size=(15, 16)).astype(object)
1892
+ d = da.from_array(x, chunks=(4, 5))
1893
+
1894
+ x_nz = np.argwhere(x)
1895
+ d_nz = da.argwhere(d)
1896
+
1897
+ assert_eq(d_nz, x_nz)
1898
+
1899
+
1900
+ def test_argwhere_str():
1901
+ # We may have behavior differences with NumPy for strings
1902
+ # with just spaces, depending on the version of NumPy.
1903
+ # https://github.com/numpy/numpy/issues/9875
1904
+ x = np.array(list("Hello world"))
1905
+ d = da.from_array(x, chunks=(4,))
1906
+
1907
+ x_nz = np.argwhere(x)
1908
+ d_nz = da.argwhere(d)
1909
+
1910
+ assert_eq(d_nz, x_nz)
1911
+
1912
+
1913
+ def test_where():
1914
+ rng = np.random.default_rng()
1915
+ x = rng.integers(10, size=(15, 14))
1916
+ x[5, 5] = x[4, 4] = 0 # Ensure some false elements
1917
+ d = da.from_array(x, chunks=(4, 5))
1918
+ y = rng.integers(10, size=15).astype(np.uint8)
1919
+ e = da.from_array(y, chunks=(4,))
1920
+
1921
+ for c1, c2 in [
1922
+ (d > 5, x > 5),
1923
+ (d, x),
1924
+ (1, 1),
1925
+ (0, 0),
1926
+ (5, 5),
1927
+ (True, True),
1928
+ (np.True_, np.True_),
1929
+ (False, False),
1930
+ (np.False_, np.False_),
1931
+ ]:
1932
+ for b1, b2 in [(0, 0), (-e[:, None], -y[:, None]), (e[:14], y[:14])]:
1933
+ w1 = da.where(c1, d, b1)
1934
+ w2 = np.where(c2, x, b2)
1935
+ assert_eq(w1, w2)
1936
+
1937
+
1938
+ def test_where_scalar_dtype():
1939
+ x = np.int32(3)
1940
+ y1 = np.array([4, 5, 6], dtype=np.int16)
1941
+ c1 = np.array([1, 0, 1])
1942
+ y2 = da.from_array(y1, chunks=2)
1943
+ c2 = da.from_array(c1, chunks=2)
1944
+ w1 = np.where(c1, x, y1)
1945
+ w2 = da.where(c2, x, y2)
1946
+ assert_eq(w1, w2)
1947
+ # Test again for the bool optimization
1948
+ w3 = np.where(True, x, y1)
1949
+ w4 = da.where(True, x, y1)
1950
+ assert_eq(w3, w4)
1951
+
1952
+
1953
+ def test_where_bool_optimization():
1954
+ rng = np.random.default_rng()
1955
+ x = rng.integers(10, size=(15, 16))
1956
+ d = da.from_array(x, chunks=(4, 5))
1957
+ y = rng.integers(10, size=(15, 16))
1958
+ e = da.from_array(y, chunks=(4, 5))
1959
+
1960
+ for c in [True, False, np.True_, np.False_, 1, 0]:
1961
+ w1 = da.where(c, d, e)
1962
+ w2 = np.where(c, x, y)
1963
+
1964
+ assert_eq(w1, w2)
1965
+
1966
+ ex_w1 = d if c else e
1967
+
1968
+ assert w1 is ex_w1
1969
+
1970
+
1971
+ def test_where_nonzero():
1972
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
1973
+ x = np.random.default_rng().integers(10, size=shape)
1974
+ d = da.from_array(x, chunks=chunks)
1975
+
1976
+ x_w = np.where(x)
1977
+ d_w = da.where(d)
1978
+
1979
+ assert isinstance(d_w, type(x_w))
1980
+ assert len(d_w) == len(x_w)
1981
+
1982
+ for i in range(len(x_w)):
1983
+ assert_eq(d_w[i], x_w[i])
1984
+
1985
+
1986
+ def test_where_incorrect_args():
1987
+ a = da.ones(5, chunks=3)
1988
+
1989
+ for kwd in ["x", "y"]:
1990
+ kwargs = {kwd: a}
1991
+ try:
1992
+ da.where(a > 0, **kwargs)
1993
+ except ValueError as e:
1994
+ assert "either both or neither of x and y should be given" in str(e)
1995
+
1996
+
1997
+ def test_count_nonzero():
1998
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
1999
+ x = np.random.default_rng().integers(10, size=shape)
2000
+ d = da.from_array(x, chunks=chunks)
2001
+
2002
+ x_c = np.count_nonzero(x)
2003
+ d_c = da.count_nonzero(d)
2004
+
2005
+ if d_c.shape == tuple():
2006
+ assert x_c == d_c.compute()
2007
+ else:
2008
+ assert_eq(x_c, d_c)
2009
+
2010
+
2011
+ @pytest.mark.parametrize("axis", [None, 0, (1,), (0, 1)])
2012
+ def test_count_nonzero_axis(axis):
2013
+ for shape, chunks in [((0, 0), (0, 0)), ((15, 16), (4, 5))]:
2014
+ x = np.random.default_rng().integers(10, size=shape)
2015
+ d = da.from_array(x, chunks=chunks)
2016
+
2017
+ x_c = np.count_nonzero(x, axis)
2018
+ d_c = da.count_nonzero(d, axis)
2019
+
2020
+ if d_c.shape == tuple():
2021
+ assert x_c == d_c.compute()
2022
+ else:
2023
+ assert_eq(x_c, d_c)
2024
+
2025
+
2026
+ def test_count_nonzero_obj():
2027
+ x = np.random.default_rng().integers(10, size=(15, 16)).astype(object)
2028
+ d = da.from_array(x, chunks=(4, 5))
2029
+
2030
+ x_c = np.count_nonzero(x)
2031
+ d_c = da.count_nonzero(d)
2032
+
2033
+ if d_c.shape == tuple():
2034
+ assert x_c == d_c.compute()
2035
+ else:
2036
+ assert_eq(x_c, d_c)
2037
+
2038
+
2039
+ @pytest.mark.parametrize("axis", [None, 0, (1,), (0, 1)])
2040
+ def test_count_nonzero_obj_axis(axis):
2041
+ x = np.random.default_rng().integers(10, size=(15, 16)).astype(object)
2042
+ d = da.from_array(x, chunks=(4, 5))
2043
+
2044
+ x_c = np.count_nonzero(x, axis)
2045
+ d_c = da.count_nonzero(d, axis)
2046
+
2047
+ if d_c.shape == tuple():
2048
+ assert x_c == d_c.compute()
2049
+ else:
2050
+ #######################################################
2051
+ # Workaround oddness with Windows and object arrays. #
2052
+ # #
2053
+ # xref: https://github.com/numpy/numpy/issues/9468 #
2054
+ #######################################################
2055
+ assert_eq(x_c.astype(np.intp), d_c)
2056
+
2057
+
2058
+ def test_count_nonzero_str():
2059
+ # We may have behavior differences with NumPy for strings
2060
+ # with just spaces, depending on the version of NumPy.
2061
+ # https://github.com/numpy/numpy/issues/9875
2062
+ x = np.array(list("Hello world"))
2063
+ d = da.from_array(x, chunks=(4,))
2064
+
2065
+ x_c = np.count_nonzero(x)
2066
+ d_c = da.count_nonzero(d)
2067
+
2068
+ assert x_c == d_c.compute()
2069
+
2070
+
2071
+ def test_flatnonzero():
2072
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
2073
+ x = np.random.default_rng().integers(10, size=shape)
2074
+ d = da.from_array(x, chunks=chunks)
2075
+
2076
+ x_fnz = np.flatnonzero(x)
2077
+ d_fnz = da.flatnonzero(d)
2078
+
2079
+ assert_eq(d_fnz, x_fnz)
2080
+
2081
+
2082
+ def test_nonzero():
2083
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
2084
+ x = np.random.default_rng().integers(10, size=shape)
2085
+ d = da.from_array(x, chunks=chunks)
2086
+
2087
+ x_nz = np.nonzero(x)
2088
+ d_nz = da.nonzero(d)
2089
+
2090
+ assert isinstance(d_nz, type(x_nz))
2091
+ assert len(d_nz) == len(x_nz)
2092
+
2093
+ for i in range(len(x_nz)):
2094
+ assert_eq(d_nz[i], x_nz[i])
2095
+
2096
+
2097
+ def test_nonzero_method():
2098
+ for shape, chunks in [(0, ()), ((0, 0), (0, 0)), ((15, 16), (4, 5))]:
2099
+ x = np.random.default_rng().integers(10, size=shape)
2100
+ d = da.from_array(x, chunks=chunks)
2101
+
2102
+ x_nz = x.nonzero()
2103
+ d_nz = d.nonzero()
2104
+
2105
+ assert isinstance(d_nz, type(x_nz))
2106
+ assert len(d_nz) == len(x_nz)
2107
+
2108
+ for i in range(len(x_nz)):
2109
+ assert_eq(d_nz[i], x_nz[i])
2110
+
2111
+
2112
+ def test_unravel_index_empty():
2113
+ shape = tuple()
2114
+ findices = np.array(0, dtype=int)
2115
+ d_findices = da.from_array(findices, chunks=1)
2116
+
2117
+ indices = np.unravel_index(findices, shape)
2118
+ d_indices = da.unravel_index(d_findices, shape)
2119
+
2120
+ assert isinstance(d_indices, type(indices))
2121
+ assert len(d_indices) == len(indices) == 0
2122
+
2123
+
2124
+ def test_unravel_index():
2125
+ rng = np.random.default_rng()
2126
+ for nindices, shape, order in [
2127
+ (0, (15,), "C"),
2128
+ (1, (15,), "C"),
2129
+ (3, (15,), "C"),
2130
+ (3, (15,), "F"),
2131
+ (2, (15, 16), "C"),
2132
+ (2, (15, 16), "F"),
2133
+ ]:
2134
+ arr = rng.random(shape)
2135
+ darr = da.from_array(arr, chunks=1)
2136
+
2137
+ findices = rng.integers(np.prod(shape, dtype=int), size=nindices)
2138
+ d_findices = da.from_array(findices, chunks=1)
2139
+
2140
+ indices = np.unravel_index(findices, shape, order)
2141
+ d_indices = da.unravel_index(d_findices, shape, order)
2142
+
2143
+ assert isinstance(d_indices, type(indices))
2144
+ assert len(d_indices) == len(indices)
2145
+
2146
+ for i in range(len(indices)):
2147
+ assert_eq(d_indices[i], indices[i])
2148
+
2149
+ assert_eq(darr.vindex[dask.compute(*d_indices)], arr[indices])
2150
+
2151
+
2152
+ @pytest.mark.parametrize(
2153
+ "asarray",
2154
+ [
2155
+ lambda x: x,
2156
+ lambda x: [np.asarray(a) for a in x],
2157
+ lambda x: [da.asarray(a) for a in x],
2158
+ np.asarray,
2159
+ da.from_array,
2160
+ ],
2161
+ )
2162
+ @pytest.mark.parametrize(
2163
+ "arr, chunks, kwargs",
2164
+ [
2165
+ # Numpy doctests:
2166
+ ([[3, 6, 6], [4, 5, 1]], (2, 3), dict(dims=(7, 6), order="C")),
2167
+ ([[3, 6, 6], [4, 5, 1]], (2, 1), dict(dims=(7, 6), order="F")),
2168
+ ([[3, 6, 6], [4, 5, 1]], 1, dict(dims=(4, 6), mode="clip")),
2169
+ ([[3, 6, 6], [4, 5, 1]], (2, 3), dict(dims=(4, 4), mode=("clip", "wrap"))),
2170
+ # Shape tests:
2171
+ ([[3, 6, 6]], (1, 1), dict(dims=(7), order="C")),
2172
+ ([[3, 6, 6], [4, 5, 1], [8, 6, 2]], (3, 1), dict(dims=(7, 6, 9), order="C")),
2173
+ # Multi-dimensional index arrays
2174
+ (
2175
+ np.arange(6).reshape(3, 2, 1).tolist(),
2176
+ (1, 2, 1),
2177
+ dict(dims=(7, 6, 9), order="C"),
2178
+ ),
2179
+ # Broadcasting index arrays
2180
+ ([1, [2, 3]], None, dict(dims=(8, 9))),
2181
+ ([1, [2, 3], [[1, 2], [3, 4], [5, 6], [7, 8]]], None, dict(dims=(8, 9, 10))),
2182
+ ],
2183
+ )
2184
+ def test_ravel_multi_index(asarray, arr, chunks, kwargs):
2185
+ if any(np.isscalar(x) for x in arr) and asarray in (np.asarray, da.from_array):
2186
+ pytest.skip()
2187
+
2188
+ if asarray is da.from_array:
2189
+ arr = np.asarray(arr)
2190
+ input = da.from_array(arr, chunks=chunks)
2191
+ else:
2192
+ arr = input = asarray(arr)
2193
+
2194
+ assert_eq(
2195
+ np.ravel_multi_index(arr, **kwargs),
2196
+ da.ravel_multi_index(input, **kwargs),
2197
+ )
2198
+
2199
+
2200
+ def test_ravel_multi_index_unknown_shape():
2201
+ multi_index = da.from_array([[3, 6, 6], [4, 5, 1], [-1, -1, -1]])
2202
+ multi_index = multi_index[(multi_index > 0).all(axis=1)]
2203
+
2204
+ multi_index_np = multi_index.compute()
2205
+
2206
+ assert np.isnan(multi_index.shape).any()
2207
+ assert_eq(
2208
+ np.ravel_multi_index(multi_index_np, dims=(7, 6)),
2209
+ da.ravel_multi_index(multi_index, dims=(7, 6)),
2210
+ )
2211
+
2212
+
2213
+ def test_ravel_multi_index_unknown_shape_fails():
2214
+ multi_index1 = da.from_array([2, -1, 3, -1], chunks=2)
2215
+ multi_index1 = multi_index1[multi_index1 > 0]
2216
+
2217
+ multi_index2 = da.from_array([[1, 2], [-1, -1], [3, 4], [5, 6], [7, 8], [-1, -1]], chunks=(2, 1))
2218
+ multi_index2 = multi_index2[(multi_index2 > 0).all(axis=1)]
2219
+
2220
+ multi_index = [1, multi_index1, multi_index2]
2221
+
2222
+ assert np.isnan(multi_index1.shape).any()
2223
+ assert np.isnan(multi_index2.shape).any()
2224
+ with pytest.raises(ValueError, match="[Cc]hunk"):
2225
+ da.ravel_multi_index(multi_index, dims=(8, 9, 10))
2226
+
2227
+
2228
+ @pytest.mark.parametrize("dims", [da.from_array([5, 10]), delayed([5, 10], nout=2)])
2229
+ @pytest.mark.parametrize("wrap_in_list", [False, True])
2230
+ def test_ravel_multi_index_delayed_dims(dims, wrap_in_list):
2231
+ with pytest.raises(NotImplementedError, match="Dask types are not supported"):
2232
+ da.ravel_multi_index((2, 1), [dims[0], dims[1]] if wrap_in_list else dims)
2233
+
2234
+
2235
+ def test_ravel_multi_index_non_int_dtype():
2236
+ with pytest.raises(TypeError, match="only int indices permitted"):
2237
+ da.ravel_multi_index(
2238
+ (1.0, 2),
2239
+ (5, 10),
2240
+ )
2241
+
2242
+
2243
+ def test_coarsen():
2244
+ x = np.random.default_rng().integers(10, size=(24, 24))
2245
+ d = da.from_array(x, chunks=(4, 8))
2246
+
2247
+ assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}), da.coarsen(np.sum, d, {0: 2, 1: 4}))
2248
+ assert_eq(da.chunk.coarsen(np.sum, x, {0: 2, 1: 4}), da.coarsen(da.sum, d, {0: 2, 1: 4}))
2249
+ assert_eq(
2250
+ da.chunk.coarsen(np.mean, x, {0: 2, 1: 4}, dtype="float32"),
2251
+ da.coarsen(da.mean, d, {0: 2, 1: 4}, dtype="float32"),
2252
+ )
2253
+
2254
+
2255
+ def test_coarsen_with_excess():
2256
+ x = da.arange(10, chunks=5)
2257
+ assert_eq(da.coarsen(np.min, x, {0: 5}, trim_excess=True), np.array([0, 5]))
2258
+ assert_eq(
2259
+ da.coarsen(np.sum, x, {0: 3}, trim_excess=True),
2260
+ np.array([0 + 1 + 2, 3 + 4 + 5, 6 + 7 + 8]),
2261
+ )
2262
+
2263
+
2264
+ @pytest.mark.parametrize("chunks", [(x,) * 3 for x in range(16, 32)])
2265
+ def test_coarsen_bad_chunks(chunks):
2266
+ x1 = da.arange(np.sum(chunks), chunks=5)
2267
+ x2 = x1.rechunk(tuple(chunks))
2268
+ assert_eq(
2269
+ da.coarsen(np.sum, x1, {0: 10}, trim_excess=True),
2270
+ da.coarsen(np.sum, x2, {0: 10}, trim_excess=True),
2271
+ )
2272
+
2273
+
2274
+ @pytest.mark.parametrize(
2275
+ "chunks, divisor",
2276
+ [
2277
+ ((1, 1), 1),
2278
+ ((1, 1), 2),
2279
+ ((1, 1, 1), 2),
2280
+ ((10, 1), 10),
2281
+ ((20, 10, 15, 23, 24), 10),
2282
+ ((20, 10, 15, 23, 24), 8),
2283
+ ((10, 20, 30, 40, 2), 10),
2284
+ ((20, 10, 15, 42, 23, 24), 16),
2285
+ ((20, 10, 15, 47, 23, 24), 10),
2286
+ ((2, 10, 15, 47, 23, 24), 4),
2287
+ ],
2288
+ )
2289
+ def test_aligned_coarsen_chunks(chunks, divisor):
2290
+ from dask_array.routines import aligned_coarsen_chunks as acc
2291
+
2292
+ aligned_chunks = acc(chunks, divisor)
2293
+ any_remainders = (np.array(aligned_chunks) % divisor) != 0
2294
+ valid_chunks = np.where((np.array(chunks) % divisor) == 0)[0]
2295
+
2296
+ # check that total number of elements is conserved
2297
+ assert sum(aligned_chunks) == sum(chunks)
2298
+ # check that valid chunks are not modified
2299
+ assert [chunks[idx] for idx in valid_chunks] == [aligned_chunks[idx] for idx in valid_chunks]
2300
+ # check that no chunks are 0
2301
+ assert (np.array(aligned_chunks) > 0).all()
2302
+ # check that at most one chunk was added
2303
+ assert len(aligned_chunks) <= len(chunks) + 1
2304
+ # check that either 0 or 1 chunks are not divisible by divisor
2305
+ assert any_remainders.sum() in (0, 1)
2306
+ # check that the only indivisible chunk is the last
2307
+ if any_remainders.sum() == 1:
2308
+ assert any_remainders[-1] == 1
2309
+
2310
+
2311
+ def test_insert():
2312
+ rng = np.random.default_rng()
2313
+ x = rng.integers(10, size=(10, 10))
2314
+ a = da.from_array(x, chunks=(5, 5))
2315
+ y = rng.integers(10, size=(5, 10))
2316
+ b = da.from_array(y, chunks=(4, 4))
2317
+
2318
+ assert_eq(np.insert(x, 0, -1, axis=0), da.insert(a, 0, -1, axis=0))
2319
+ assert_eq(np.insert(x, 3, -1, axis=-1), da.insert(a, 3, -1, axis=-1))
2320
+ assert_eq(np.insert(x, 5, -1, axis=1), da.insert(a, 5, -1, axis=1))
2321
+ assert_eq(np.insert(x, -1, -1, axis=-2), da.insert(a, -1, -1, axis=-2))
2322
+ assert_eq(np.insert(x, [2, 3, 3], -1, axis=1), da.insert(a, [2, 3, 3], -1, axis=1))
2323
+ assert_eq(
2324
+ np.insert(x, [2, 3, 8, 8, -2, -2], -1, axis=0),
2325
+ da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0),
2326
+ )
2327
+ assert_eq(np.insert(x, slice(1, 4), -1, axis=1), da.insert(a, slice(1, 4), -1, axis=1))
2328
+ assert_eq(
2329
+ np.insert(x, [2] * 3 + [5] * 2, y, axis=0),
2330
+ da.insert(a, [2] * 3 + [5] * 2, b, axis=0),
2331
+ )
2332
+ assert_eq(np.insert(x, 0, y[0], axis=1), da.insert(a, 0, b[0], axis=1))
2333
+
2334
+ assert same_keys(
2335
+ da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0),
2336
+ da.insert(a, [2, 3, 8, 8, -2, -2], -1, axis=0),
2337
+ )
2338
+
2339
+ with pytest.raises(NotImplementedError):
2340
+ da.insert(a, [4, 2], -1, axis=0)
2341
+
2342
+ with pytest.raises(AxisError):
2343
+ da.insert(a, [3], -1, axis=2)
2344
+
2345
+ with pytest.raises(AxisError):
2346
+ da.insert(a, [3], -1, axis=-3)
2347
+
2348
+
2349
+ def test_append():
2350
+ rng = np.random.default_rng()
2351
+ x = rng.integers(10, size=(10, 10))
2352
+ a = da.from_array(x, chunks=(5, 5))
2353
+
2354
+ # appendage for axis 1 / -1
2355
+ y1 = rng.integers(10, size=(10, 5))
2356
+ b1 = da.from_array(y1, chunks=(4, 4))
2357
+
2358
+ # appendage for axis 0 / -2
2359
+ y0 = rng.integers(10, size=(5, 10))
2360
+ b0 = da.from_array(y0, chunks=(4, 4))
2361
+
2362
+ # test axis None
2363
+ assert_eq(np.append(x, x, axis=None), da.append(a, a, axis=None))
2364
+ assert_eq(np.append(x, y0, axis=None), da.append(a, b0, axis=None))
2365
+ assert_eq(np.append(x, y1, axis=None), da.append(a, b1, axis=None))
2366
+
2367
+ # test axis 0 / -2
2368
+ assert_eq(np.append(x, y0, axis=0), da.append(a, b0, axis=0))
2369
+ assert_eq(np.append(x, y0, axis=-2), da.append(a, b0, axis=-2))
2370
+
2371
+ # test axis 1 / -1
2372
+ assert_eq(np.append(x, y1, axis=1), da.append(a, b1, axis=1))
2373
+ assert_eq(np.append(x, y1, axis=-1), da.append(a, b1, axis=-1))
2374
+
2375
+ # test --> treat values as array_likes
2376
+ assert_eq(
2377
+ np.append(x, ((0,) * 10,) * 10, axis=None),
2378
+ da.append(a, ((0,) * 10,) * 10, axis=None),
2379
+ )
2380
+ assert_eq(np.append(x, ((0,) * 10,) * 10, axis=0), da.append(a, ((0,) * 10,) * 10, axis=0))
2381
+ assert_eq(np.append(x, ((0,) * 10,) * 10, axis=1), da.append(a, ((0,) * 10,) * 10, axis=1))
2382
+
2383
+ # check AxisError
2384
+ with pytest.raises(AxisError):
2385
+ da.append(a, ((0,) * 10,) * 10, axis=2)
2386
+ with pytest.raises(AxisError):
2387
+ da.append(a, ((0,) * 10,) * 10, axis=-3)
2388
+
2389
+ # check ValueError if dimensions don't align
2390
+ with pytest.raises(ValueError):
2391
+ da.append(a, (0,) * 10, axis=0)
2392
+
2393
+
2394
+ def test_multi_insert():
2395
+ z = np.random.default_rng().integers(10, size=(1, 2))
2396
+ c = da.from_array(z, chunks=(1, 2))
2397
+ assert_eq(
2398
+ np.insert(np.insert(z, [0, 1], -1, axis=0), [1], -1, axis=1),
2399
+ da.insert(da.insert(c, [0, 1], -1, axis=0), [1], -1, axis=1),
2400
+ )
2401
+
2402
+
2403
+ def test_delete():
2404
+ x = np.random.default_rng().integers(10, size=(10, 10))
2405
+ a = da.from_array(x, chunks=(5, 5))
2406
+
2407
+ assert_eq(np.delete(x, 0, axis=0), da.delete(a, 0, axis=0))
2408
+ assert_eq(np.delete(x, 3, axis=-1), da.delete(a, 3, axis=-1))
2409
+ assert_eq(np.delete(x, 5, axis=1), da.delete(a, 5, axis=1))
2410
+ assert_eq(np.delete(x, -1, axis=-2), da.delete(a, -1, axis=-2))
2411
+ assert_eq(np.delete(x, [2, 3, 3], axis=1), da.delete(a, [2, 3, 3], axis=1))
2412
+ assert_eq(
2413
+ np.delete(x, [2, 3, 8, 8], axis=0),
2414
+ da.delete(a, [2, 3, 8, 8], axis=0),
2415
+ )
2416
+ assert_eq(np.delete(x, slice(1, 4), axis=1), da.delete(a, slice(1, 4), axis=1))
2417
+ assert_eq(np.delete(x, slice(1, 10, -1), axis=1), da.delete(a, slice(1, 10, -1), axis=1))
2418
+
2419
+ assert_eq(np.delete(a, [4, 2], axis=0), da.delete(a, [4, 2], axis=0))
2420
+
2421
+ with pytest.raises(AxisError):
2422
+ da.delete(a, [3], axis=2)
2423
+
2424
+ with pytest.raises(AxisError):
2425
+ da.delete(a, [3], axis=-3)
2426
+
2427
+
2428
+ def test_result_type():
2429
+ a = da.from_array(np.ones(5, np.float32), chunks=(3,))
2430
+ b = da.from_array(np.ones(5, np.int16), chunks=(3,))
2431
+ c = da.from_array(np.ones(5, np.int64), chunks=(3,))
2432
+ x = np.ones(5, np.float32)
2433
+ assert da.result_type(b, c) == np.int64
2434
+ assert da.result_type(a, b, c) == np.float64
2435
+ assert da.result_type(b, np.float32) == np.float32
2436
+ assert da.result_type(b, np.dtype(np.float32)) == np.float32
2437
+ assert da.result_type(b, x) == np.float32
2438
+ # Effect of scalars depends on their value
2439
+ assert da.result_type(1, b) == np.int16
2440
+ assert da.result_type(1.0, a) == np.float32
2441
+ if NUMPY_GE_200:
2442
+ assert da.result_type(np.int64(1), b) == np.int64
2443
+ assert da.result_type(np.ones((), np.int64), b) == np.int64
2444
+ assert da.result_type(1e200, a) == np.float32
2445
+ else:
2446
+ assert da.result_type(np.int64(1), b) == np.int16
2447
+ assert da.result_type(np.ones((), np.int64), b) == np.int16 # 0d array
2448
+ assert da.result_type(1e200, a) == np.float64 # 1e200 is too big for float32
2449
+
2450
+ # dask 0d-arrays are NOT treated like scalars
2451
+ c = da.from_array(np.ones((), np.float64), chunks=())
2452
+ assert da.result_type(a, c) == np.float64
2453
+
2454
+
2455
+ def _numpy_and_dask_inputs(input_sigs):
2456
+ # einsum label dimensions
2457
+ _dimensions = {
2458
+ "a": 5,
2459
+ "b": 6,
2460
+ "c": 7,
2461
+ "d": 5,
2462
+ "e": 6,
2463
+ "f": 10,
2464
+ "g": 1,
2465
+ "h": 2,
2466
+ "*": 11,
2467
+ }
2468
+
2469
+ # dimension chunks sizes
2470
+ _chunks = {
2471
+ "a": (2, 3),
2472
+ "b": (2, 3, 1),
2473
+ "c": (2, 3, 2),
2474
+ "d": (4, 1),
2475
+ "e": (2, 4),
2476
+ "f": (1, 2, 3, 4),
2477
+ "g": 1,
2478
+ "h": (1, 1),
2479
+ "*": 11,
2480
+ }
2481
+
2482
+ def _shape_from_string(s):
2483
+ return tuple(_dimensions[c] for c in s)
2484
+
2485
+ def _chunks_from_string(s):
2486
+ return tuple(_chunks[c] for c in s)
2487
+
2488
+ shapes = [_shape_from_string(s) for s in input_sigs]
2489
+ chunks = [_chunks_from_string(s) for s in input_sigs]
2490
+
2491
+ np_inputs = [np.random.default_rng().random(s) for s in shapes]
2492
+ da_inputs = [da.from_array(i, chunks=c) for i, c in zip(np_inputs, chunks)]
2493
+
2494
+ return np_inputs, da_inputs
2495
+
2496
+
2497
+ @pytest.mark.parametrize(
2498
+ "einsum_signature",
2499
+ [
2500
+ "abc,bad->abcd",
2501
+ "abcdef,bcdfg->abcdeg",
2502
+ "ea,fb,abcd,gc,hd->efgh",
2503
+ "ab,b",
2504
+ "aa",
2505
+ "a,a->",
2506
+ "a,a->a",
2507
+ "a,a",
2508
+ "a,b",
2509
+ "a,b,c",
2510
+ "a",
2511
+ "ba,b",
2512
+ "ba,b->",
2513
+ "defab,fedbc->defac",
2514
+ "ab...,bc...->ac...",
2515
+ "a...a",
2516
+ "abc...->cba...",
2517
+ "...ab->...a",
2518
+ "a...a->a...",
2519
+ # Following 2 from # https://stackoverflow.com/a/19203475/1611416
2520
+ "...abc,...abcd->...d",
2521
+ "ab...,b->ab...",
2522
+ # https://github.com/dask/dask/pull/3412#discussion_r182413444
2523
+ "aa->a",
2524
+ "ab,ab,c->c",
2525
+ "aab,bc->ac",
2526
+ "aab,bcc->ac",
2527
+ "fdf,cdd,ccd,afe->ae",
2528
+ "fff,fae,bef,def->abd",
2529
+ ],
2530
+ )
2531
+ def test_einsum(einsum_signature):
2532
+ input_sigs = einsum_signature.split("->")[0].replace("...", "*").split(",")
2533
+
2534
+ np_inputs, da_inputs = _numpy_and_dask_inputs(input_sigs)
2535
+
2536
+ with warnings.catch_warnings():
2537
+ warnings.simplefilter("ignore", category=da.PerformanceWarning)
2538
+ assert_eq(
2539
+ np.einsum(einsum_signature, *np_inputs),
2540
+ da.einsum(einsum_signature, *da_inputs),
2541
+ )
2542
+
2543
+
2544
+ def test_einsum_chunksizes():
2545
+ arr1 = da.random.random((1024, 8, 8, 8, 8), chunks=(256, 8, 8, 8, 8))
2546
+ arr2 = da.random.random((1024, 8, 8, 8, 8), chunks=(256, 8, 8, 8, 8))
2547
+ with warnings.catch_warnings():
2548
+ warnings.simplefilter("ignore", category=da.PerformanceWarning)
2549
+ result = da.einsum("aijkl,amnop->ijklmnop", arr1, arr2)
2550
+ assert result.chunks == ((4,) * 2,) * 8
2551
+
2552
+ arr1 = da.random.random((64, 8, 8, 8, 8), chunks=(32, 8, 1, 8, 8))
2553
+ arr2 = da.random.random((64, 8, 8, 8, 8), chunks=(32, 8, 8, 1, 8))
2554
+ with warnings.catch_warnings():
2555
+ warnings.simplefilter("ignore", category=da.PerformanceWarning)
2556
+ result = da.einsum("aijkl,amnop->ijklmnop", arr1, arr2)
2557
+ assert result.chunks == (
2558
+ (4,) * 2,
2559
+ (1,) * 8,
2560
+ (4,) * 2,
2561
+ (4,) * 2,
2562
+ (4,) * 2,
2563
+ (4,) * 2,
2564
+ (1,) * 8,
2565
+ (4,) * 2,
2566
+ )
2567
+
2568
+ np_arr1 = np.random.random((2, 4, 4))
2569
+ np_arr2 = np.random.random((2, 4, 4))
2570
+
2571
+ arr1 = da.from_array(np_arr1, chunks=(1, 2, 2))
2572
+ arr2 = da.from_array(np_arr2, chunks=(1, 2, 2))
2573
+ with warnings.catch_warnings():
2574
+ warnings.simplefilter("ignore", category=da.PerformanceWarning)
2575
+ result = da.einsum("aij,amn->ijmn", arr1, arr2)
2576
+ assert result.chunks == ((1,) * 4,) * 4
2577
+ assert_eq(np.einsum("aij,amn->ijmn", np_arr1, np_arr2), result)
2578
+
2579
+ # regression test for GH11627
2580
+ z = da.ones(shape=(40000, 2, 10, 2, 10), dtype=np.float64, chunksize=(40000, 1, 5, 2, 10))
2581
+ x = da.ones(shape=(2, 10, 10), dtype=np.float64, chunksize=(2, 10, 10))
2582
+ y = da.ones(shape=(2, 10, 10), dtype=np.float64, chunksize=(2, 10, 10))
2583
+ res = da.einsum("abcde,bfc,dfe->acef", z, x, y)
2584
+ assert res.numblocks == (1, 1, 1, 1)
2585
+
2586
+
2587
+ @pytest.mark.parametrize("optimize_opts", [(True, False), ("greedy", False), ("optimal", False)])
2588
+ def test_einsum_optimize(optimize_opts):
2589
+ sig = "ea,fb,abcd,gc,hd->efgh"
2590
+ input_sigs = sig.split("->")[0].split(",")
2591
+ np_inputs, da_inputs = _numpy_and_dask_inputs(input_sigs)
2592
+
2593
+ opt1, opt2 = optimize_opts
2594
+
2595
+ assert_eq(
2596
+ np.einsum(sig, *np_inputs, optimize=opt1),
2597
+ da.einsum(sig, *np_inputs, optimize=opt2),
2598
+ )
2599
+
2600
+ assert_eq(
2601
+ np.einsum(sig, *np_inputs, optimize=opt2),
2602
+ da.einsum(sig, *np_inputs, optimize=opt1),
2603
+ )
2604
+
2605
+
2606
+ @pytest.mark.parametrize("order", ["C", "F", "A", "K"])
2607
+ def test_einsum_order(order):
2608
+ sig = "ea,fb,abcd,gc,hd->efgh"
2609
+ input_sigs = sig.split("->")[0].split(",")
2610
+ np_inputs, da_inputs = _numpy_and_dask_inputs(input_sigs)
2611
+
2612
+ assert_eq(np.einsum(sig, *np_inputs, order=order), da.einsum(sig, *np_inputs, order=order))
2613
+
2614
+
2615
+ @pytest.mark.parametrize("casting", ["no", "equiv", "safe", "same_kind", "unsafe"])
2616
+ def test_einsum_casting(casting):
2617
+ sig = "ea,fb,abcd,gc,hd->efgh"
2618
+ input_sigs = sig.split("->")[0].split(",")
2619
+ np_inputs, da_inputs = _numpy_and_dask_inputs(input_sigs)
2620
+
2621
+ assert_eq(
2622
+ np.einsum(sig, *np_inputs, casting=casting),
2623
+ da.einsum(sig, *np_inputs, casting=casting),
2624
+ )
2625
+
2626
+
2627
+ @pytest.mark.parametrize("split_every", [None, 2])
2628
+ def test_einsum_split_every(split_every):
2629
+ np_inputs, da_inputs = _numpy_and_dask_inputs("a")
2630
+ assert_eq(np.einsum("a", *np_inputs), da.einsum("a", *da_inputs, split_every=split_every))
2631
+
2632
+
2633
+ def test_einsum_invalid_args():
2634
+ _, da_inputs = _numpy_and_dask_inputs("a")
2635
+ with pytest.raises(TypeError):
2636
+ da.einsum("a", *da_inputs, foo=1, bar=2)
2637
+
2638
+
2639
+ def test_einsum_broadcasting_contraction():
2640
+ rng = np.random.default_rng()
2641
+ a = rng.random((1, 5, 4))
2642
+ b = rng.random((4, 6))
2643
+ c = rng.random((5, 6))
2644
+ d = rng.random(10)
2645
+
2646
+ d_a = da.from_array(a, chunks=(1, (2, 3), (2, 2)))
2647
+ d_b = da.from_array(b, chunks=((2, 2), (4, 2)))
2648
+ d_c = da.from_array(c, chunks=((2, 3), (4, 2)))
2649
+ d_d = da.from_array(d, chunks=((7, 3)))
2650
+
2651
+ np_res = np.einsum("ijk,kl,jl", a, b, c)
2652
+ da_res = da.einsum("ijk,kl,jl", d_a, d_b, d_c)
2653
+ assert_eq(np_res, da_res)
2654
+
2655
+ mul_res = da_res * d
2656
+
2657
+ np_res = np.einsum("ijk,kl,jl,i->i", a, b, c, d)
2658
+ da_res = da.einsum("ijk,kl,jl,i->i", d_a, d_b, d_c, d_d)
2659
+ assert_eq(np_res, da_res)
2660
+ assert_eq(np_res, mul_res)
2661
+
2662
+
2663
+ def test_einsum_broadcasting_contraction2():
2664
+ rng = np.random.default_rng()
2665
+ a = rng.random((1, 1, 5, 4))
2666
+ b = rng.random((4, 6))
2667
+ c = rng.random((5, 6))
2668
+ d = rng.random((7, 7))
2669
+
2670
+ d_a = da.from_array(a, chunks=(1, 1, (2, 3), (2, 2)))
2671
+ d_b = da.from_array(b, chunks=((2, 2), (4, 2)))
2672
+ d_c = da.from_array(c, chunks=((2, 3), (4, 2)))
2673
+ d_d = da.from_array(d, chunks=((7, 3)))
2674
+
2675
+ np_res = np.einsum("abjk,kl,jl", a, b, c)
2676
+ da_res = da.einsum("abjk,kl,jl", d_a, d_b, d_c)
2677
+ assert_eq(np_res, da_res)
2678
+
2679
+ mul_res = da_res * d
2680
+
2681
+ np_res = np.einsum("abjk,kl,jl,ab->ab", a, b, c, d)
2682
+ da_res = da.einsum("abjk,kl,jl,ab->ab", d_a, d_b, d_c, d_d)
2683
+ assert_eq(np_res, da_res)
2684
+ assert_eq(np_res, mul_res)
2685
+
2686
+
2687
+ def test_einsum_broadcasting_contraction3():
2688
+ rng = np.random.default_rng()
2689
+ a = rng.random((1, 5, 4))
2690
+ b = rng.random((4, 1, 6))
2691
+ c = rng.random((5, 6))
2692
+ d = rng.random((7, 7))
2693
+
2694
+ d_a = da.from_array(a, chunks=(1, (2, 3), (2, 2)))
2695
+ d_b = da.from_array(b, chunks=((2, 2), 1, (4, 2)))
2696
+ d_c = da.from_array(c, chunks=((2, 3), (4, 2)))
2697
+ d_d = da.from_array(d, chunks=((7, 3)))
2698
+
2699
+ np_res = np.einsum("ajk,kbl,jl,ab->ab", a, b, c, d)
2700
+ da_res = da.einsum("ajk,kbl,jl,ab->ab", d_a, d_b, d_c, d_d)
2701
+ assert_eq(np_res, da_res)
2702
+
2703
+
2704
+ def test_einsum_empty_dimension():
2705
+ arr = np.random.random((10, 10))
2706
+ darr = da.from_array(arr, chunks=(5, 5))
2707
+ darr = darr[:0]
2708
+ result = da.einsum("ca,ca->c", darr, darr)
2709
+ assert_eq(result, np.einsum("ca,ca->c", arr[:0], arr[:0]))
2710
+
2711
+
2712
+ @pytest.mark.parametrize("a", [np.arange(11), np.arange(6).reshape((3, 2))])
2713
+ @pytest.mark.parametrize("returned", [True, False])
2714
+ def test_average(a, returned):
2715
+ d_a = da.from_array(a, chunks=2)
2716
+
2717
+ np_avg = np.average(a, returned=returned)
2718
+ da_avg = da.average(d_a, returned=returned)
2719
+
2720
+ assert_eq(np_avg, da_avg)
2721
+
2722
+
2723
+ @pytest.mark.parametrize("a", [np.arange(11), np.arange(6).reshape((3, 2))])
2724
+ def test_average_keepdims(a):
2725
+ d_a = da.from_array(a, chunks=2)
2726
+
2727
+ da_avg = da.average(d_a, keepdims=True)
2728
+
2729
+ np_avg = np.average(a, keepdims=True)
2730
+ assert_eq(np_avg, da_avg)
2731
+
2732
+
2733
+ @pytest.mark.parametrize("keepdims", [False, True])
2734
+ def test_average_weights(keepdims):
2735
+ a = np.arange(6).reshape((3, 2))
2736
+ d_a = da.from_array(a, chunks=2)
2737
+
2738
+ weights = np.array([0.25, 0.75])
2739
+ d_weights = da.from_array(weights, chunks=2)
2740
+
2741
+ da_avg = da.average(d_a, weights=d_weights, axis=1, keepdims=keepdims)
2742
+
2743
+ assert_eq(da_avg, np.average(a, weights=weights, axis=1, keepdims=keepdims))
2744
+
2745
+
2746
+ def test_average_raises():
2747
+ d_a = da.arange(11, chunks=2)
2748
+
2749
+ with pytest.raises(TypeError):
2750
+ da.average(d_a, weights=[1, 2, 3])
2751
+
2752
+ with pytest.warns(RuntimeWarning):
2753
+ da.average(d_a, weights=da.zeros_like(d_a)).compute()
2754
+
2755
+
2756
+ def test_iscomplexobj():
2757
+ a = da.from_array(np.array([1, 2]), 2)
2758
+ assert np.iscomplexobj(a) is False
2759
+
2760
+ a = da.from_array(np.array([1, 2 + 0j]), 2)
2761
+ assert np.iscomplexobj(a) is True
2762
+
2763
+
2764
+ def test_tril_triu():
2765
+ A = np.random.default_rng().standard_normal((20, 20))
2766
+ for chk in [5, 4]:
2767
+ dA = da.from_array(A, (chk, chk))
2768
+
2769
+ assert np.allclose(da.triu(dA).compute(), np.triu(A))
2770
+ assert np.allclose(da.tril(dA).compute(), np.tril(A))
2771
+
2772
+ for k in [
2773
+ -25,
2774
+ -20,
2775
+ -19,
2776
+ -15,
2777
+ -14,
2778
+ -9,
2779
+ -8,
2780
+ -6,
2781
+ -5,
2782
+ -1,
2783
+ 1,
2784
+ 4,
2785
+ 5,
2786
+ 6,
2787
+ 8,
2788
+ 10,
2789
+ 11,
2790
+ 15,
2791
+ 16,
2792
+ 19,
2793
+ 20,
2794
+ 21,
2795
+ ]:
2796
+ assert np.allclose(da.triu(dA, k).compute(), np.triu(A, k))
2797
+ assert np.allclose(da.tril(dA, k).compute(), np.tril(A, k))
2798
+
2799
+
2800
+ def test_tril_ndims():
2801
+ A = np.random.default_rng().integers(0, 11, (10, 10, 10))
2802
+ dA = da.from_array(A, chunks=(5, 5, 5))
2803
+ assert_eq(da.triu(dA), np.triu(A))
2804
+
2805
+
2806
+ def test_tril_triu_non_square_arrays():
2807
+ A = np.random.default_rng().integers(0, 11, (30, 35))
2808
+ dA = da.from_array(A, chunks=(5, 5))
2809
+ assert_eq(da.triu(dA), np.triu(A))
2810
+ assert_eq(da.tril(dA), np.tril(A))
2811
+
2812
+
2813
+ @pytest.mark.parametrize(
2814
+ "n, k, m, chunks",
2815
+ [(3, 0, 3, "auto"), (3, 1, 3, "auto"), (3, -1, 3, "auto"), (5, 0, 5, 1)],
2816
+ )
2817
+ def test_tril_triu_indices(n, k, m, chunks):
2818
+ actual = da.tril_indices(n=n, k=k, m=m, chunks=chunks)[0]
2819
+ expected = np.tril_indices(n=n, k=k, m=m)[0]
2820
+
2821
+ if sys.platform == "win32":
2822
+ assert_eq(
2823
+ actual.astype(expected.dtype),
2824
+ expected,
2825
+ )
2826
+ else:
2827
+ assert_eq(actual, expected)
2828
+
2829
+ actual = da.triu_indices(n=n, k=k, m=m, chunks=chunks)[0]
2830
+ expected = np.triu_indices(n=n, k=k, m=m)[0]
2831
+
2832
+ if sys.platform == "win32":
2833
+ assert_eq(
2834
+ actual.astype(expected.dtype),
2835
+ expected,
2836
+ )
2837
+ else:
2838
+ assert_eq(actual, expected)
2839
+
2840
+
2841
+ def test_pickle_vectorized_routines():
2842
+ """Test that graphs that internally use np.vectorize can be pickled"""
2843
+ a = da.from_array(["foo", "bar", ""])
2844
+
2845
+ b = da.count_nonzero(a)
2846
+ assert_eq(b, 2, check_dtype=False)
2847
+ b2 = pickle.loads(pickle.dumps(b))
2848
+ assert_eq(b2, 2, check_dtype=False)
2849
+
2850
+ c = da.argwhere(a)
2851
+ assert_eq(c, [[0], [1]], check_dtype=False)
2852
+ c2 = pickle.loads(pickle.dumps(c))
2853
+ assert_eq(c2, [[0], [1]], check_dtype=False)