dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def trace(a, offset=0, axis1=0, axis2=1, dtype=None):
|
|
5
|
+
"""
|
|
6
|
+
Return the sum along diagonals of the array.
|
|
7
|
+
|
|
8
|
+
This docstring was copied from numpy.trace.
|
|
9
|
+
|
|
10
|
+
Some inconsistencies with the Dask version may exist.
|
|
11
|
+
|
|
12
|
+
If `a` is 2-D, the sum along its diagonal with the given offset
|
|
13
|
+
is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i.
|
|
14
|
+
|
|
15
|
+
If `a` has more than two dimensions, then the axes specified by axis1 and
|
|
16
|
+
axis2 are used to determine the 2-D sub-arrays whose traces are returned.
|
|
17
|
+
The shape of the resulting array is the same as that of `a` with `axis1`
|
|
18
|
+
and `axis2` removed.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
a : array_like
|
|
23
|
+
Input array, from which the diagonals are taken.
|
|
24
|
+
offset : int, optional
|
|
25
|
+
Offset of the diagonal from the main diagonal. Can be both positive
|
|
26
|
+
and negative. Defaults to 0.
|
|
27
|
+
axis1, axis2 : int, optional
|
|
28
|
+
Axes to be used as the first and second axis of the 2-D sub-arrays
|
|
29
|
+
from which the diagonals should be taken. Defaults are the first two
|
|
30
|
+
axes of `a`.
|
|
31
|
+
dtype : dtype, optional
|
|
32
|
+
Determines the data-type of the returned array and of the accumulator
|
|
33
|
+
where the elements are summed. If dtype has the value None and `a` is
|
|
34
|
+
of integer type of precision less than the default integer precision,
|
|
35
|
+
then the default integer precision is used. Otherwise, the precision
|
|
36
|
+
is the same as that of `a`.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
sum_along_diagonals : ndarray
|
|
41
|
+
If `a` is 2-D, the sum along the diagonal is returned. If `a` has
|
|
42
|
+
larger dimensions, then an array of sums along diagonals is returned.
|
|
43
|
+
|
|
44
|
+
See Also
|
|
45
|
+
--------
|
|
46
|
+
diag, diagonal, diagflat
|
|
47
|
+
|
|
48
|
+
Examples
|
|
49
|
+
--------
|
|
50
|
+
>>> import dask_array as da
|
|
51
|
+
>>> da.trace(da.eye(3)).compute() # doctest: +SKIP
|
|
52
|
+
3.0
|
|
53
|
+
"""
|
|
54
|
+
from dask_array.creation import diagonal
|
|
55
|
+
|
|
56
|
+
return diagonal(a, offset=offset, axis1=axis1, axis2=axis2).sum(-1, dtype=dtype)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Array routines for array-expr."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from dask.utils import derived_from
|
|
6
|
+
|
|
7
|
+
# Direct imports from submodules
|
|
8
|
+
# Re-exports from other modules
|
|
9
|
+
from dask_array._blockwise import outer # noqa: F401
|
|
10
|
+
from dask_array._collection import asanyarray, asarray
|
|
11
|
+
from dask_array._ufunc import ( # noqa: F401
|
|
12
|
+
allclose,
|
|
13
|
+
around,
|
|
14
|
+
isclose,
|
|
15
|
+
isnull,
|
|
16
|
+
notnull,
|
|
17
|
+
round,
|
|
18
|
+
)
|
|
19
|
+
from dask_array.routines._apply import apply_along_axis, apply_over_axes
|
|
20
|
+
from dask_array.routines._bincount import bincount
|
|
21
|
+
from dask_array.routines._broadcast import broadcast_arrays, unify_chunks
|
|
22
|
+
from dask_array.routines._coarsen import aligned_coarsen_chunks, coarsen
|
|
23
|
+
from dask_array.routines._diff import diff
|
|
24
|
+
from dask_array.routines._gradient import gradient
|
|
25
|
+
from dask_array.routines._indexing import ravel_multi_index, unravel_index
|
|
26
|
+
from dask_array.routines._insert_delete import (
|
|
27
|
+
append,
|
|
28
|
+
delete,
|
|
29
|
+
ediff1d,
|
|
30
|
+
insert,
|
|
31
|
+
)
|
|
32
|
+
from dask_array.routines._misc import (
|
|
33
|
+
compress,
|
|
34
|
+
ndim,
|
|
35
|
+
result_type,
|
|
36
|
+
shape,
|
|
37
|
+
take,
|
|
38
|
+
)
|
|
39
|
+
from dask_array.routines._nonzero import (
|
|
40
|
+
argwhere,
|
|
41
|
+
count_nonzero,
|
|
42
|
+
flatnonzero,
|
|
43
|
+
isnonzero,
|
|
44
|
+
nonzero,
|
|
45
|
+
)
|
|
46
|
+
from dask_array.routines._search import isin, searchsorted
|
|
47
|
+
from dask_array.routines._select import (
|
|
48
|
+
choose,
|
|
49
|
+
digitize,
|
|
50
|
+
extract,
|
|
51
|
+
piecewise,
|
|
52
|
+
select,
|
|
53
|
+
)
|
|
54
|
+
from dask_array.routines._statistics import average, corrcoef, cov
|
|
55
|
+
from dask_array.routines._topk import argtopk, topk
|
|
56
|
+
from dask_array.routines._triangular import (
|
|
57
|
+
tril,
|
|
58
|
+
tril_indices,
|
|
59
|
+
tril_indices_from,
|
|
60
|
+
triu,
|
|
61
|
+
triu_indices,
|
|
62
|
+
triu_indices_from,
|
|
63
|
+
)
|
|
64
|
+
from dask_array.routines._unique import union1d, unique
|
|
65
|
+
from dask_array.routines._where import where
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@derived_from(np)
|
|
69
|
+
def array(x, dtype=None, ndmin=None, *, like=None):
|
|
70
|
+
x = asarray(x, like=like)
|
|
71
|
+
while ndmin is not None and x.ndim < ndmin:
|
|
72
|
+
x = x[None, :]
|
|
73
|
+
if dtype is not None and x.dtype != dtype:
|
|
74
|
+
x = x.astype(dtype)
|
|
75
|
+
return x
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
__all__ = [
|
|
79
|
+
"aligned_coarsen_chunks",
|
|
80
|
+
"allclose",
|
|
81
|
+
"append",
|
|
82
|
+
"apply_along_axis",
|
|
83
|
+
"array",
|
|
84
|
+
"apply_over_axes",
|
|
85
|
+
"argwhere",
|
|
86
|
+
"argtopk",
|
|
87
|
+
"around",
|
|
88
|
+
"average",
|
|
89
|
+
"bincount",
|
|
90
|
+
"broadcast_arrays",
|
|
91
|
+
"choose",
|
|
92
|
+
"coarsen",
|
|
93
|
+
"compress",
|
|
94
|
+
"corrcoef",
|
|
95
|
+
"count_nonzero",
|
|
96
|
+
"cov",
|
|
97
|
+
"delete",
|
|
98
|
+
"diff",
|
|
99
|
+
"digitize",
|
|
100
|
+
"ediff1d",
|
|
101
|
+
"extract",
|
|
102
|
+
"flatnonzero",
|
|
103
|
+
"gradient",
|
|
104
|
+
"insert",
|
|
105
|
+
"isclose",
|
|
106
|
+
"isin",
|
|
107
|
+
"isnonzero",
|
|
108
|
+
"isnull",
|
|
109
|
+
"ndim",
|
|
110
|
+
"nonzero",
|
|
111
|
+
"notnull",
|
|
112
|
+
"outer",
|
|
113
|
+
"piecewise",
|
|
114
|
+
"ravel_multi_index",
|
|
115
|
+
"result_type",
|
|
116
|
+
"round",
|
|
117
|
+
"searchsorted",
|
|
118
|
+
"select",
|
|
119
|
+
"shape",
|
|
120
|
+
"take",
|
|
121
|
+
"topk",
|
|
122
|
+
"tril",
|
|
123
|
+
"tril_indices",
|
|
124
|
+
"tril_indices_from",
|
|
125
|
+
"triu",
|
|
126
|
+
"triu_indices",
|
|
127
|
+
"triu_indices_from",
|
|
128
|
+
"unify_chunks",
|
|
129
|
+
"union1d",
|
|
130
|
+
"unique",
|
|
131
|
+
"unravel_index",
|
|
132
|
+
"where",
|
|
133
|
+
]
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Apply functions for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from dask_array._collection import asarray
|
|
8
|
+
from dask.utils import derived_from, funcname
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _inner_apply_along_axis(arr, func1d, func1d_axis, func1d_args, func1d_kwargs):
|
|
12
|
+
return np.apply_along_axis(func1d, func1d_axis, arr, *func1d_args, **func1d_kwargs)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@derived_from(np)
|
|
16
|
+
def apply_along_axis(func1d, axis, arr, *args, dtype=None, shape=None, **kwargs):
|
|
17
|
+
"""
|
|
18
|
+
This is a blocked variant of :func:`numpy.apply_along_axis` implemented via
|
|
19
|
+
:func:`dask.array.map_blocks`
|
|
20
|
+
|
|
21
|
+
Notes
|
|
22
|
+
-----
|
|
23
|
+
If either of `dtype` or `shape` are not provided, Dask attempts to
|
|
24
|
+
determine them by calling `func1d` on a dummy array. This may produce
|
|
25
|
+
incorrect values for `dtype` or `shape`, so we recommend providing them.
|
|
26
|
+
"""
|
|
27
|
+
arr = asarray(arr)
|
|
28
|
+
|
|
29
|
+
# Verify that axis is valid and throw an error otherwise
|
|
30
|
+
axis = len(arr.shape[:axis])
|
|
31
|
+
|
|
32
|
+
# If necessary, infer dtype and shape of the output of func1d by calling it on test data.
|
|
33
|
+
if shape is None or dtype is None:
|
|
34
|
+
test_data = np.ones((1,), dtype=arr.dtype)
|
|
35
|
+
test_result = np.array(func1d(test_data, *args, **kwargs))
|
|
36
|
+
if shape is None:
|
|
37
|
+
shape = test_result.shape
|
|
38
|
+
if dtype is None:
|
|
39
|
+
dtype = test_result.dtype
|
|
40
|
+
|
|
41
|
+
# Rechunk so that func1d is applied over the full axis.
|
|
42
|
+
arr = arr.rechunk(arr.chunks[:axis] + (arr.shape[axis : axis + 1],) + arr.chunks[axis + 1 :])
|
|
43
|
+
|
|
44
|
+
# Map func1d over the data to get the result
|
|
45
|
+
# Adds other axes as needed.
|
|
46
|
+
result = arr.map_blocks(
|
|
47
|
+
_inner_apply_along_axis,
|
|
48
|
+
name=funcname(func1d) + "-along-axis",
|
|
49
|
+
dtype=dtype,
|
|
50
|
+
chunks=(arr.chunks[:axis] + shape + arr.chunks[axis + 1 :]),
|
|
51
|
+
drop_axis=axis,
|
|
52
|
+
new_axis=list(range(axis, axis + len(shape), 1)),
|
|
53
|
+
func1d=func1d,
|
|
54
|
+
func1d_axis=axis,
|
|
55
|
+
func1d_args=args,
|
|
56
|
+
func1d_kwargs=kwargs,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@derived_from(np)
|
|
63
|
+
def apply_over_axes(func, a, axes):
|
|
64
|
+
"""Apply a function repeatedly over multiple axes."""
|
|
65
|
+
a = asarray(a)
|
|
66
|
+
try:
|
|
67
|
+
axes = tuple(axes)
|
|
68
|
+
except TypeError:
|
|
69
|
+
axes = (axes,)
|
|
70
|
+
|
|
71
|
+
sl = a.ndim * (slice(None),)
|
|
72
|
+
|
|
73
|
+
# Compute using `apply_along_axis`.
|
|
74
|
+
result = a
|
|
75
|
+
for i in axes:
|
|
76
|
+
result = apply_along_axis(func, i, result, 0)
|
|
77
|
+
|
|
78
|
+
# Restore original dimensionality or error.
|
|
79
|
+
if result.ndim == (a.ndim - 1):
|
|
80
|
+
result = result[sl[:i] + (None,)]
|
|
81
|
+
elif result.ndim != a.ndim:
|
|
82
|
+
raise ValueError("func must either preserve dimensionality of the input or reduce it by one.")
|
|
83
|
+
|
|
84
|
+
return result
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Bincount implementation for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from functools import cached_property, partial
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from dask._task_spec import Task, TaskRef
|
|
10
|
+
from dask_array._collection import asarray, new_collection
|
|
11
|
+
from dask_array._expr import ArrayExpr
|
|
12
|
+
from dask.utils import derived_from
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _bincount_chunk(x, weights, minlength):
|
|
16
|
+
"""Apply bincount to a single chunk, wrapping result in extra dimension."""
|
|
17
|
+
if weights is not None:
|
|
18
|
+
result = np.bincount(x, weights=weights, minlength=minlength)
|
|
19
|
+
else:
|
|
20
|
+
result = np.bincount(x, minlength=minlength)
|
|
21
|
+
return result[np.newaxis]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _bincount_sum(bincounts, axis, keepdims, dtype=None):
|
|
25
|
+
"""Sum bincount results, handling variable lengths when minlength=0."""
|
|
26
|
+
if not isinstance(bincounts, list):
|
|
27
|
+
bincounts = [bincounts]
|
|
28
|
+
|
|
29
|
+
n = max(b.shape[1] for b in bincounts)
|
|
30
|
+
out = np.zeros((1, n), dtype=bincounts[0].dtype)
|
|
31
|
+
for b in bincounts:
|
|
32
|
+
out[0, : b.shape[1]] += b[0]
|
|
33
|
+
|
|
34
|
+
if not keepdims:
|
|
35
|
+
return out[0]
|
|
36
|
+
return out
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BincountChunked(ArrayExpr):
|
|
40
|
+
"""Expression for per-chunk bincount computation."""
|
|
41
|
+
|
|
42
|
+
_parameters = ["x", "weights", "minlength", "output_size", "meta_provided"]
|
|
43
|
+
_defaults = {"weights": None}
|
|
44
|
+
|
|
45
|
+
@cached_property
|
|
46
|
+
def _meta(self):
|
|
47
|
+
return np.empty((0, 0), dtype=self.meta_provided.dtype)
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def chunks(self):
|
|
51
|
+
nchunks = len(self.x.chunks[0])
|
|
52
|
+
return ((1,) * nchunks, (self.output_size,))
|
|
53
|
+
|
|
54
|
+
@cached_property
|
|
55
|
+
def _name(self):
|
|
56
|
+
return f"bincount-{self.deterministic_token}"
|
|
57
|
+
|
|
58
|
+
def _layer(self):
|
|
59
|
+
dsk = {}
|
|
60
|
+
minlen = self.minlength
|
|
61
|
+
for i in range(len(self.x.chunks[0])):
|
|
62
|
+
key = (self._name, i, 0)
|
|
63
|
+
x_ref = TaskRef((self.x._name, i))
|
|
64
|
+
w_ref = TaskRef((self.weights._name, i)) if self.weights is not None else None
|
|
65
|
+
dsk[key] = Task(key, _bincount_chunk, x_ref, w_ref, minlen)
|
|
66
|
+
return dsk
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def _dependencies(self):
|
|
70
|
+
deps = [self.x]
|
|
71
|
+
if self.weights is not None:
|
|
72
|
+
deps.append(self.weights)
|
|
73
|
+
return deps
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@derived_from(np)
|
|
77
|
+
def bincount(x, weights=None, minlength=0, split_every=None):
|
|
78
|
+
"""Count number of occurrences of each value in array of non-negative ints."""
|
|
79
|
+
from dask_array.reductions import _tree_reduce
|
|
80
|
+
|
|
81
|
+
x = asarray(x)
|
|
82
|
+
if x.ndim != 1:
|
|
83
|
+
raise ValueError("Input array must be one dimensional. Try using x.ravel()")
|
|
84
|
+
if weights is not None:
|
|
85
|
+
weights = asarray(weights)
|
|
86
|
+
if weights.chunks != x.chunks:
|
|
87
|
+
raise ValueError("Chunks of input array x and weights must match.")
|
|
88
|
+
|
|
89
|
+
if weights is not None:
|
|
90
|
+
meta = np.bincount([1], weights=np.array([1], dtype=weights.dtype))
|
|
91
|
+
else:
|
|
92
|
+
meta = np.bincount([])
|
|
93
|
+
|
|
94
|
+
if minlength == 0:
|
|
95
|
+
output_size = np.nan
|
|
96
|
+
else:
|
|
97
|
+
output_size = minlength
|
|
98
|
+
|
|
99
|
+
chunked_counts = new_collection(BincountChunked(x, weights, minlength, output_size, meta_provided=meta))
|
|
100
|
+
|
|
101
|
+
if minlength > 0:
|
|
102
|
+
return chunked_counts.sum(axis=0)
|
|
103
|
+
else:
|
|
104
|
+
return _tree_reduce(
|
|
105
|
+
chunked_counts,
|
|
106
|
+
aggregate=partial(_bincount_sum, dtype=meta.dtype),
|
|
107
|
+
axis=(0,),
|
|
108
|
+
keepdims=False,
|
|
109
|
+
dtype=meta.dtype,
|
|
110
|
+
split_every=split_every,
|
|
111
|
+
concatenate=False,
|
|
112
|
+
)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Broadcasting utilities for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from dask_array._collection import asanyarray, asarray
|
|
8
|
+
from dask.utils import derived_from
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def unify_chunks(*args, **kwargs):
|
|
12
|
+
"""
|
|
13
|
+
Unify chunks across a sequence of arrays
|
|
14
|
+
|
|
15
|
+
This utility function is used within other common operations like
|
|
16
|
+
:func:`dask.array.core.map_blocks` and :func:`dask.array.core.blockwise`.
|
|
17
|
+
It is not commonly used by end-users directly.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
*args: sequence of Array, index pairs
|
|
22
|
+
Sequence like (x, 'ij', y, 'jk', z, 'i')
|
|
23
|
+
|
|
24
|
+
Examples
|
|
25
|
+
--------
|
|
26
|
+
>>> import dask_array as da
|
|
27
|
+
>>> x = da.ones(10, chunks=((5, 2, 3),))
|
|
28
|
+
>>> y = da.ones(10, chunks=((2, 3, 5),))
|
|
29
|
+
>>> chunkss, arrays = unify_chunks(x, 'i', y, 'i')
|
|
30
|
+
>>> chunkss
|
|
31
|
+
{'i': (2, 3, 2, 3)}
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
chunkss : dict
|
|
36
|
+
Map like {index: chunks}.
|
|
37
|
+
arrays : list
|
|
38
|
+
List of rechunked arrays.
|
|
39
|
+
"""
|
|
40
|
+
from toolz import partition
|
|
41
|
+
|
|
42
|
+
from dask_array._new_collection import new_collection
|
|
43
|
+
from dask_array._expr import unify_chunks_expr
|
|
44
|
+
|
|
45
|
+
if not args:
|
|
46
|
+
return {}, []
|
|
47
|
+
|
|
48
|
+
arginds = [(asanyarray(a) if ind is not None else a, ind) for a, ind in partition(2, args)]
|
|
49
|
+
|
|
50
|
+
arrays, inds = zip(*arginds)
|
|
51
|
+
if all(ind is None for ind in inds):
|
|
52
|
+
return {}, list(arrays)
|
|
53
|
+
|
|
54
|
+
# Convert to expression-level args
|
|
55
|
+
expr_args = []
|
|
56
|
+
for a, ind in arginds:
|
|
57
|
+
if ind is not None:
|
|
58
|
+
expr_args.extend([a.expr, ind])
|
|
59
|
+
else:
|
|
60
|
+
expr_args.extend([a, ind])
|
|
61
|
+
|
|
62
|
+
warn = kwargs.pop("warn", True)
|
|
63
|
+
if kwargs:
|
|
64
|
+
raise TypeError(f"Unexpected keyword arguments: {kwargs}")
|
|
65
|
+
chunkss, expr_arrays, _ = unify_chunks_expr(*expr_args, warn=warn)
|
|
66
|
+
|
|
67
|
+
# Convert back to collections
|
|
68
|
+
result_arrays = []
|
|
69
|
+
for a, orig_a_ind in zip(expr_arrays, arginds):
|
|
70
|
+
orig_a, ind = orig_a_ind
|
|
71
|
+
if ind is None:
|
|
72
|
+
result_arrays.append(orig_a)
|
|
73
|
+
else:
|
|
74
|
+
result_arrays.append(new_collection(a))
|
|
75
|
+
|
|
76
|
+
return chunkss, result_arrays
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@derived_from(np)
|
|
80
|
+
def broadcast_arrays(*args, subok=False):
|
|
81
|
+
"""Broadcast any number of arrays against each other."""
|
|
82
|
+
from toolz import concat
|
|
83
|
+
|
|
84
|
+
from dask_array._collection import broadcast_to
|
|
85
|
+
from dask_array._core_utils import broadcast_chunks, broadcast_shapes
|
|
86
|
+
from dask_array._numpy_compat import NUMPY_GE_200
|
|
87
|
+
|
|
88
|
+
subok = bool(subok)
|
|
89
|
+
|
|
90
|
+
to_array = asanyarray if subok else asarray
|
|
91
|
+
args = tuple(to_array(e) for e in args)
|
|
92
|
+
|
|
93
|
+
if not args:
|
|
94
|
+
if NUMPY_GE_200:
|
|
95
|
+
return ()
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
# Unify uneven chunking
|
|
99
|
+
inds = [list(reversed(range(x.ndim))) for x in args]
|
|
100
|
+
uc_args = list(concat(zip(args, inds)))
|
|
101
|
+
_, args = unify_chunks(*uc_args, warn=False)
|
|
102
|
+
|
|
103
|
+
shape = broadcast_shapes(*(e.shape for e in args))
|
|
104
|
+
chunks = broadcast_chunks(*(e.chunks for e in args))
|
|
105
|
+
|
|
106
|
+
if NUMPY_GE_200:
|
|
107
|
+
result = tuple(broadcast_to(e, shape=shape, chunks=chunks) for e in args)
|
|
108
|
+
else:
|
|
109
|
+
result = [broadcast_to(e, shape=shape, chunks=chunks) for e in args]
|
|
110
|
+
|
|
111
|
+
return result
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Coarsen implementation for array-expr."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import itertools
|
|
6
|
+
from functools import cached_property, partial
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from dask._task_spec import Task, TaskRef
|
|
11
|
+
from dask_array._collection import asarray, new_collection
|
|
12
|
+
from dask_array._expr import ArrayExpr
|
|
13
|
+
from dask_array._utils import meta_from_array
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _partition(n, size):
|
|
17
|
+
"""Partition n into evenly distributed sizes."""
|
|
18
|
+
quotient, remainder = divmod(n, size)
|
|
19
|
+
return [size] * quotient, [remainder] if remainder else []
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def aligned_coarsen_chunks(chunks, multiple):
|
|
23
|
+
"""Returns a new chunking aligned with the coarsening multiple."""
|
|
24
|
+
chunks = np.asarray(chunks)
|
|
25
|
+
overflow = chunks % multiple
|
|
26
|
+
excess = overflow.sum()
|
|
27
|
+
new_chunks = chunks - overflow
|
|
28
|
+
chunk_validity = new_chunks == chunks
|
|
29
|
+
valid_inds, invalid_inds = np.where(chunk_validity)[0], np.where(~chunk_validity)[0]
|
|
30
|
+
chunk_modification_order = [
|
|
31
|
+
*invalid_inds[np.argsort(new_chunks[invalid_inds])],
|
|
32
|
+
*valid_inds[np.argsort(new_chunks[valid_inds])],
|
|
33
|
+
]
|
|
34
|
+
partitioned_excess, remainder = _partition(excess, multiple)
|
|
35
|
+
for idx, extra in enumerate(partitioned_excess):
|
|
36
|
+
new_chunks[chunk_modification_order[idx]] += extra
|
|
37
|
+
new_chunks = np.array([*new_chunks, *remainder])
|
|
38
|
+
new_chunks = new_chunks[new_chunks > 0]
|
|
39
|
+
return tuple(new_chunks)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Coarsen(ArrayExpr):
|
|
43
|
+
"""Expression class for coarsen operation."""
|
|
44
|
+
|
|
45
|
+
_parameters = ["x", "reduction", "axes", "trim_excess", "kwargs"]
|
|
46
|
+
_defaults = {"trim_excess": False, "kwargs": None}
|
|
47
|
+
|
|
48
|
+
@cached_property
|
|
49
|
+
def _reduction(self):
|
|
50
|
+
reduction = self.reduction
|
|
51
|
+
# Handle dask.array or dask_array functions - use numpy equivalent
|
|
52
|
+
if reduction.__module__.startswith(("dask.", "dask_array")):
|
|
53
|
+
return getattr(np, reduction.__name__)
|
|
54
|
+
return reduction
|
|
55
|
+
|
|
56
|
+
@cached_property
|
|
57
|
+
def _kwargs(self):
|
|
58
|
+
return self.kwargs or {}
|
|
59
|
+
|
|
60
|
+
@cached_property
|
|
61
|
+
def _meta(self):
|
|
62
|
+
x = self.x
|
|
63
|
+
meta = self._reduction(np.empty((1,) * x.ndim, dtype=x.dtype), **self._kwargs)
|
|
64
|
+
return meta_from_array(meta, ndim=x.ndim)
|
|
65
|
+
|
|
66
|
+
@cached_property
|
|
67
|
+
def chunks(self):
|
|
68
|
+
x = self.x
|
|
69
|
+
axes = self.axes
|
|
70
|
+
coarsen_dim = lambda dim, ax: int(dim // axes.get(ax, 1))
|
|
71
|
+
return tuple(
|
|
72
|
+
tuple(coarsen_dim(bd, i) for bd in bds if coarsen_dim(bd, i) > 0) for i, bds in enumerate(x.chunks)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def _layer(self):
|
|
76
|
+
from dask_array import _chunk as chunk
|
|
77
|
+
|
|
78
|
+
x = self.x
|
|
79
|
+
axes = self.axes
|
|
80
|
+
name = self._name
|
|
81
|
+
dsk = {}
|
|
82
|
+
|
|
83
|
+
in_ranges = [range(len(c)) for c in x.chunks]
|
|
84
|
+
for in_idx in itertools.product(*in_ranges):
|
|
85
|
+
in_key = (x._name,) + in_idx
|
|
86
|
+
out_key = (name,) + in_idx
|
|
87
|
+
func = partial(
|
|
88
|
+
chunk.coarsen,
|
|
89
|
+
self._reduction,
|
|
90
|
+
axes=axes,
|
|
91
|
+
trim_excess=self.trim_excess,
|
|
92
|
+
**self._kwargs,
|
|
93
|
+
)
|
|
94
|
+
dsk[out_key] = Task(out_key, func, TaskRef(in_key))
|
|
95
|
+
|
|
96
|
+
return dsk
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def coarsen(reduction, x, axes, trim_excess=False, **kwargs):
|
|
100
|
+
"""Coarsen array by applying reduction to fixed size neighborhoods."""
|
|
101
|
+
x = asarray(x)
|
|
102
|
+
|
|
103
|
+
if not trim_excess and not all(x.shape[i] % div == 0 for i, div in axes.items()):
|
|
104
|
+
msg = f"Coarsening factors {axes} do not align with array shape {x.shape}."
|
|
105
|
+
raise ValueError(msg)
|
|
106
|
+
|
|
107
|
+
new_chunks = {}
|
|
108
|
+
for i, div in axes.items():
|
|
109
|
+
aligned = aligned_coarsen_chunks(x.chunks[i], div)
|
|
110
|
+
if aligned != x.chunks[i]:
|
|
111
|
+
new_chunks[i] = aligned
|
|
112
|
+
if new_chunks:
|
|
113
|
+
x = x.rechunk(new_chunks)
|
|
114
|
+
|
|
115
|
+
return new_collection(Coarsen(x, reduction, axes, trim_excess, kwargs or None))
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Difference operation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def diff(a, n=1, axis=-1, prepend=None, append=None):
|
|
7
|
+
"""Calculate the n-th discrete difference along the given axis.
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
a : array_like
|
|
12
|
+
Input array.
|
|
13
|
+
n : int, optional
|
|
14
|
+
The number of times values are differenced. Default is 1.
|
|
15
|
+
axis : int, optional
|
|
16
|
+
The axis along which the difference is taken. Default is -1.
|
|
17
|
+
prepend, append : array_like, optional
|
|
18
|
+
Values to prepend or append to a along axis prior to
|
|
19
|
+
performing the difference.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
diff : Array
|
|
24
|
+
The n-th differences.
|
|
25
|
+
|
|
26
|
+
See Also
|
|
27
|
+
--------
|
|
28
|
+
numpy.diff
|
|
29
|
+
"""
|
|
30
|
+
# Lazy imports to avoid circular dependencies
|
|
31
|
+
from dask_array._broadcast import broadcast_to
|
|
32
|
+
from dask_array.core import asarray
|
|
33
|
+
from dask_array.stacking import concatenate
|
|
34
|
+
|
|
35
|
+
a = asarray(a)
|
|
36
|
+
n = int(n)
|
|
37
|
+
axis = int(axis)
|
|
38
|
+
|
|
39
|
+
if n == 0:
|
|
40
|
+
return a
|
|
41
|
+
if n < 0:
|
|
42
|
+
raise ValueError(f"order must be non-negative but got {n}")
|
|
43
|
+
|
|
44
|
+
combined = []
|
|
45
|
+
if prepend is not None:
|
|
46
|
+
prepend = asarray(prepend)
|
|
47
|
+
if prepend.ndim == 0:
|
|
48
|
+
shape = list(a.shape)
|
|
49
|
+
shape[axis] = 1
|
|
50
|
+
prepend = broadcast_to(prepend, tuple(shape))
|
|
51
|
+
combined.append(prepend)
|
|
52
|
+
|
|
53
|
+
combined.append(a)
|
|
54
|
+
|
|
55
|
+
if append is not None:
|
|
56
|
+
append = asarray(append)
|
|
57
|
+
if append.ndim == 0:
|
|
58
|
+
shape = list(a.shape)
|
|
59
|
+
shape[axis] = 1
|
|
60
|
+
append = broadcast_to(append, tuple(shape))
|
|
61
|
+
combined.append(append)
|
|
62
|
+
|
|
63
|
+
if len(combined) > 1:
|
|
64
|
+
a = concatenate(combined, axis)
|
|
65
|
+
|
|
66
|
+
sl_1 = a.ndim * [slice(None)]
|
|
67
|
+
sl_2 = a.ndim * [slice(None)]
|
|
68
|
+
|
|
69
|
+
sl_1[axis] = slice(1, None)
|
|
70
|
+
sl_2[axis] = slice(None, -1)
|
|
71
|
+
|
|
72
|
+
sl_1 = tuple(sl_1)
|
|
73
|
+
sl_2 = tuple(sl_2)
|
|
74
|
+
|
|
75
|
+
r = a
|
|
76
|
+
for _ in range(n):
|
|
77
|
+
r = r[sl_1] - r[sl_2]
|
|
78
|
+
|
|
79
|
+
return r
|