PyPI - dask-array - Versions diffs - 0.1.0__py3-none-any.whl - Mend

dask-array 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

dask_array/__init__.py +228 -0
dask_array/_backends.py +76 -0
dask_array/_backends_array.py +99 -0
dask_array/_blockwise.py +1410 -0
dask_array/_broadcast.py +272 -0
dask_array/_chunk.py +445 -0
dask_array/_chunk_types.py +54 -0
dask_array/_collection.py +1644 -0
dask_array/_concatenate.py +331 -0
dask_array/_core_utils.py +1365 -0
dask_array/_dispatch.py +141 -0
dask_array/_einsum.py +277 -0
dask_array/_expr.py +544 -0
dask_array/_expr_flow.py +586 -0
dask_array/_gufunc.py +805 -0
dask_array/_histogram.py +617 -0
dask_array/_map_blocks.py +652 -0
dask_array/_new_collection.py +10 -0
dask_array/_numpy_compat.py +135 -0
dask_array/_overlap.py +1159 -0
dask_array/_rechunk.py +1050 -0
dask_array/_reshape.py +710 -0
dask_array/_routines.py +102 -0
dask_array/_shuffle.py +448 -0
dask_array/_stack.py +264 -0
dask_array/_svg.py +291 -0
dask_array/_templates.py +29 -0
dask_array/_test_utils.py +257 -0
dask_array/_ufunc.py +385 -0
dask_array/_utils.py +349 -0
dask_array/_visualize.py +223 -0
dask_array/_xarray.py +337 -0
dask_array/core/__init__.py +34 -0
dask_array/core/_blockwise_funcs.py +312 -0
dask_array/core/_conversion.py +422 -0
dask_array/core/_from_graph.py +97 -0
dask_array/creation/__init__.py +71 -0
dask_array/creation/_arange.py +121 -0
dask_array/creation/_diag.py +116 -0
dask_array/creation/_diagonal.py +241 -0
dask_array/creation/_eye.py +103 -0
dask_array/creation/_linspace.py +102 -0
dask_array/creation/_mesh.py +134 -0
dask_array/creation/_ones_zeros.py +454 -0
dask_array/creation/_pad.py +270 -0
dask_array/creation/_repeat.py +55 -0
dask_array/creation/_tile.py +36 -0
dask_array/creation/_tri.py +28 -0
dask_array/creation/_utils.py +296 -0
dask_array/fft.py +320 -0
dask_array/io/__init__.py +39 -0
dask_array/io/_base.py +10 -0
dask_array/io/_from_array.py +257 -0
dask_array/io/_from_delayed.py +95 -0
dask_array/io/_from_graph.py +54 -0
dask_array/io/_from_npy_stack.py +67 -0
dask_array/io/_store.py +336 -0
dask_array/io/_tiledb.py +159 -0
dask_array/io/_to_npy_stack.py +65 -0
dask_array/io/_zarr.py +449 -0
dask_array/linalg/__init__.py +39 -0
dask_array/linalg/_cholesky.py +234 -0
dask_array/linalg/_lu.py +300 -0
dask_array/linalg/_norm.py +94 -0
dask_array/linalg/_qr.py +601 -0
dask_array/linalg/_solve.py +349 -0
dask_array/linalg/_svd.py +394 -0
dask_array/linalg/_tensordot.py +334 -0
dask_array/linalg/_utils.py +74 -0
dask_array/manipulation/__init__.py +45 -0
dask_array/manipulation/_expand.py +321 -0
dask_array/manipulation/_flip.py +92 -0
dask_array/manipulation/_roll.py +78 -0
dask_array/manipulation/_transpose.py +309 -0
dask_array/random/__init__.py +125 -0
dask_array/random/_choice.py +181 -0
dask_array/random/_expr.py +256 -0
dask_array/random/_generator.py +441 -0
dask_array/random/_random_state.py +259 -0
dask_array/random/_utils.py +84 -0
dask_array/reductions/__init__.py +84 -0
dask_array/reductions/_arg_reduction.py +130 -0
dask_array/reductions/_common.py +1082 -0
dask_array/reductions/_cumulative.py +522 -0
dask_array/reductions/_percentile.py +261 -0
dask_array/reductions/_reduction.py +725 -0
dask_array/reductions/_trace.py +56 -0
dask_array/routines/__init__.py +133 -0
dask_array/routines/_apply.py +84 -0
dask_array/routines/_bincount.py +112 -0
dask_array/routines/_broadcast.py +111 -0
dask_array/routines/_coarsen.py +115 -0
dask_array/routines/_diff.py +79 -0
dask_array/routines/_gradient.py +158 -0
dask_array/routines/_indexing.py +65 -0
dask_array/routines/_insert_delete.py +132 -0
dask_array/routines/_misc.py +122 -0
dask_array/routines/_nonzero.py +72 -0
dask_array/routines/_search.py +123 -0
dask_array/routines/_select.py +113 -0
dask_array/routines/_statistics.py +171 -0
dask_array/routines/_topk.py +82 -0
dask_array/routines/_triangular.py +74 -0
dask_array/routines/_unique.py +232 -0
dask_array/routines/_where.py +62 -0
dask_array/slicing/__init__.py +67 -0
dask_array/slicing/_basic.py +550 -0
dask_array/slicing/_blocks.py +138 -0
dask_array/slicing/_bool_index.py +145 -0
dask_array/slicing/_setitem.py +329 -0
dask_array/slicing/_squeeze.py +101 -0
dask_array/slicing/_utils.py +1133 -0
dask_array/slicing/_vindex.py +282 -0
dask_array/stacking/__init__.py +15 -0
dask_array/stacking/_block.py +83 -0
dask_array/stacking/_simple.py +58 -0
dask_array/templates/array.html.j2 +48 -0
dask_array/tests/__init__.py +0 -0
dask_array/tests/conftest.py +22 -0
dask_array/tests/test_api.py +40 -0
dask_array/tests/test_binary_op_chunks.py +107 -0
dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
dask_array/tests/test_collection.py +799 -0
dask_array/tests/test_creation.py +1102 -0
dask_array/tests/test_expr_flow.py +143 -0
dask_array/tests/test_linalg.py +1130 -0
dask_array/tests/test_map_blocks_multi_output.py +104 -0
dask_array/tests/test_rechunk_pushdown.py +214 -0
dask_array/tests/test_reductions.py +1091 -0
dask_array/tests/test_routines.py +2853 -0
dask_array/tests/test_shuffle_chunks.py +67 -0
dask_array/tests/test_slice_pushdown.py +968 -0
dask_array/tests/test_slice_through_blockwise.py +678 -0
dask_array/tests/test_slice_through_overlap.py +366 -0
dask_array/tests/test_slice_through_reshape.py +272 -0
dask_array/tests/test_slicing.py +839 -0
dask_array/tests/test_transpose_slice_pushdown.py +208 -0
dask_array/tests/test_visualize.py +94 -0
dask_array/tests/test_xarray.py +193 -0
dask_array-0.1.0.dist-info/METADATA +48 -0
dask_array-0.1.0.dist-info/RECORD +144 -0
dask_array-0.1.0.dist-info/WHEEL +4 -0
dask_array-0.1.0.dist-info/entry_points.txt +2 -0
dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0

dask_array/reductions/_cumulative.py ADDED Viewed

@@ -0,0 +1,522 @@
+from __future__ import annotations
+import math
+import operator
+from functools import partial
+from itertools import product
+import numpy as np
+from dask_array._new_collection import new_collection
+# Local implementations of merge/scan functions (copied from dask.array.reductions)
+def _cumsum_merge(a, b):
+    if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array):
+        values = np.ma.getdata(a) + np.ma.getdata(b)
+        return np.ma.masked_array(values, mask=np.ma.getmaskarray(b))
+    return a + b
+def _cumprod_merge(a, b):
+    if isinstance(a, np.ma.masked_array) or isinstance(b, np.ma.masked_array):
+        values = np.ma.getdata(a) * np.ma.getdata(b)
+        return np.ma.masked_array(values, mask=np.ma.getmaskarray(b))
+    return a * b
+def _prefixscan_first(func, x, axis, dtype):
+    """Compute the prefix scan (e.g., cumsum) on the first block."""
+    return func(x, axis=axis, dtype=dtype)
+def _prefixscan_combine(func, binop, pre, x, axis, dtype):
+    """Combine results of a parallel prefix scan such as cumsum.
+    Parameters
+    ----------
+    func : callable
+        Cumulative function (e.g. ``np.cumsum``)
+    binop : callable
+        Associative function (e.g. ``add``)
+    pre : np.array
+        The value calculated in parallel from ``preop``.
+        For example, the sum of all the previous blocks.
+    x : np.array
+        Current block
+    axis : int
+    dtype : dtype
+    Returns
+    -------
+    np.array
+    """
+    return binop(pre, func(x, axis=axis, dtype=dtype))
+from dask_array._expr import ArrayExpr
+from dask_array._utils import validate_axis
+from dask.tokenize import _tokenize_deterministic
+from dask.utils import cached_property, funcname
+def _prepare_cumulative(x, axis):
+    """Prepare array for cumulative reduction.
+    When axis=None, flatten and rechunk the array to a 1D array with
+    npartitions chunks, then set axis=0.
+    Returns (array, axis) tuple.
+    """
+    from dask_array._collection import Array
+    if not isinstance(x, Array):
+        from dask_array.core._conversion import asarray
+        x = asarray(x)
+    if axis is None:
+        if x.ndim > 1:
+            x = x.flatten().rechunk(chunks=x.npartitions)
+        axis = 0
+    return x, axis
+class CumReduction(ArrayExpr):
+    """Expression for cumulative reductions (cumsum, cumprod, etc.).
+    Uses the sequential algorithm: apply the cumulative function to each block,
+    then combine blocks by adding the last element of previous blocks.
+    """
+    _parameters = ["array", "func", "binop", "ident", "axis", "_dtype"]
+    _defaults = {"_dtype": None}
+    @cached_property
+    def _name(self):
+        return f"{funcname(self.func)}-{_tokenize_deterministic(*self.operands)}"
+    @cached_property
+    def dtype(self):
+        if self._dtype is not None:
+            return np.dtype(self._dtype)
+        # Infer dtype from the function
+        return getattr(self.func(np.ones((0,), dtype=self.array.dtype)), "dtype", object)
+    @cached_property
+    def _meta(self):
+        # Return meta with the correct dtype
+        meta = self.array._meta
+        if hasattr(meta, "dtype") and meta.dtype != self.dtype:
+            return meta.astype(self.dtype)
+        return meta
+    @cached_property
+    def chunks(self):
+        return self.array.chunks
+    def _layer(self):
+        from functools import partial
+        from dask.utils import apply
+        x = self.array
+        axis = self.axis
+        func = self.func
+        binop = self.binop
+        ident = self.ident
+        dtype = self.dtype
+        # Apply cumulative function to each block
+        # We'll use a two-phase approach:
+        # 1. First, apply the cumulative function to each block (via map_blocks expression)
+        # 2. Then, build the correction tasks that add previous block totals
+        dsk = {}
+        # Phase 1: Apply cumulative function per block
+        # We create intermediate keys for the per-block cumulative results
+        per_block_name = self._name + "-chunk"
+        # Determine if we need to pass dtype to the function
+        use_dtype = False
+        try:
+            import inspect
+            func_params = inspect.signature(func).parameters
+            use_dtype = "dtype" in func_params
+        except ValueError:
+            try:
+                # Workaround for numpy ufunc.accumulate
+                if isinstance(func.__self__, np.ufunc) and func.__name__ == "accumulate":
+                    use_dtype = True
+            except AttributeError:
+                pass
+        # Create per-block cumulative tasks
+        for key in product(*map(range, x.numblocks)):
+            if use_dtype:
+                dsk[(per_block_name,) + key] = (
+                    partial(func, axis=axis, dtype=dtype),
+                    (x.name,) + key,
+                )
+            else:
+                dsk[(per_block_name,) + key] = (
+                    partial(func, axis=axis),
+                    (x.name,) + key,
+                )
+        # Phase 2: Build the sequential combination
+        n = x.numblocks[axis]
+        full = slice(None, None, None)
+        slc = (full,) * axis + (slice(-1, None),) + (full,) * (x.ndim - axis - 1)
+        # For each position along the axis, we need to track the cumulative
+        # last values from all previous blocks
+        indices = list(product(*[range(nb) if i != axis else [0] for i, nb in enumerate(x.numblocks)]))
+        # Initialize "extra" values (cumulative sums of previous blocks) to identity
+        for ind in indices:
+            shape = tuple(x.chunks[i][ii] if i != axis else 1 for i, ii in enumerate(ind))
+            dsk[(self._name, "extra") + ind] = (
+                apply,
+                np.full_like,
+                (x._meta, ident, dtype),
+                {"shape": shape},
+            )
+            # First block along axis: just use per-block result
+            dsk[(self._name,) + ind] = (per_block_name,) + ind
+        # For subsequent blocks, add the cumulative total from previous blocks
+        for i in range(1, n):
+            last_indices = indices
+            indices = list(product(*[range(nb) if ii != axis else [i] for ii, nb in enumerate(x.numblocks)]))
+            for old, ind in zip(last_indices, indices):
+                this_extra = (self._name, "extra") + ind
+                # Combine previous extra with the last element of the previous block
+                dsk[this_extra] = (
+                    binop,
+                    (self._name, "extra") + old,
+                    (operator.getitem, (per_block_name,) + old, slc),
+                )
+                # Add the extra to this block's result
+                dsk[(self._name,) + ind] = (
+                    binop,
+                    this_extra,
+                    (per_block_name,) + ind,
+                )
+        return dsk
+class CumReductionBlelloch(ArrayExpr):
+    """Expression for parallel cumulative reductions using Blelloch's algorithm.
+    This is a work-efficient parallel scan that uses O(log n) parallel steps.
+    """
+    _parameters = ["array", "func", "preop", "binop", "axis", "_dtype"]
+    _defaults = {"_dtype": None}
+    @cached_property
+    def _name(self):
+        return f"{funcname(self.func)}-{_tokenize_deterministic(*self.operands)}"
+    @cached_property
+    def dtype(self):
+        if self._dtype is not None:
+            return np.dtype(self._dtype)
+        return getattr(self.func(np.ones((0,), dtype=self.array.dtype)), "dtype", object)
+    @cached_property
+    def _meta(self):
+        # Return meta with the correct dtype
+        meta = self.array._meta
+        if hasattr(meta, "dtype") and meta.dtype != self.dtype:
+            return meta.astype(self.dtype)
+        return meta
+    @cached_property
+    def chunks(self):
+        return self.array.chunks
+    def _layer(self):
+        import builtins as py_builtins
+        x = self.array
+        axis = self.axis
+        func = self.func
+        preop = self.preop
+        binop = self.binop
+        dtype = self.dtype
+        base_key = (self._name,)
+        dsk = {}
+        # Phase 1: Compute prefix values (sum/product of each block)
+        batches_name = self._name + "-batch"
+        for key in product(*map(range, x.numblocks)):
+            dsk[(batches_name,) + key] = (
+                partial(preop, axis=axis, keepdims=True),
+                (x.name,) + key,
+            )
+        # Build indices for each position along the axis
+        full_indices = [
+            list(product(*[range(nb) if j != axis else [i] for j, nb in enumerate(x.numblocks)]))
+            for i in range(x.numblocks[axis])
+        ]
+        if not full_indices:
+            return dsk
+        *indices, last_index = full_indices
+        prefix_vals = [[(batches_name,) + index for index in vals] for vals in indices]
+        n_vals = len(prefix_vals)
+        level = 0
+        if n_vals >= 2:
+            # Upsweep
+            stride = 1
+            stride2 = 2
+            while stride2 <= n_vals:
+                for i in range(stride2 - 1, n_vals, stride2):
+                    new_vals = []
+                    for index, left_val, right_val in zip(indices[i], prefix_vals[i - stride], prefix_vals[i]):
+                        key = base_key + index + (level, i)
+                        dsk[key] = (binop, left_val, right_val)
+                        new_vals.append(key)
+                    prefix_vals[i] = new_vals
+                stride = stride2
+                stride2 *= 2
+                level += 1
+            # Downsweep
+            stride2 = py_builtins.max(2, 2 ** math.ceil(math.log2(n_vals // 2)))
+            stride = stride2 // 2
+            while stride > 0:
+                for i in range(stride2 + stride - 1, n_vals, stride2):
+                    new_vals = []
+                    for index, left_val, right_val in zip(indices[i], prefix_vals[i - stride], prefix_vals[i]):
+                        key = base_key + index + (level, i)
+                        dsk[key] = (binop, left_val, right_val)
+                        new_vals.append(key)
+                    prefix_vals[i] = new_vals
+                stride2 = stride
+                stride //= 2
+                level += 1
+        # Phase 2: Apply cumulative function and combine with prefix sums
+        # First blocks: just apply the cumulative function
+        for index in full_indices[0]:
+            dsk[base_key + index] = (
+                _prefixscan_first,
+                func,
+                (x.name,) + index,
+                axis,
+                dtype,
+            )
+        # Remaining blocks: apply cumulative function and add prefix sum
+        for indexes, vals in zip(full_indices[1:], prefix_vals):
+            for index, val in zip(indexes, vals):
+                dsk[base_key + index] = (
+                    _prefixscan_combine,
+                    func,
+                    binop,
+                    val,
+                    (x.name,) + index,
+                    axis,
+                    dtype,
+                )
+        return dsk
+def _cumreduction_expr(func, binop, ident, x, axis, dtype, out, method, preop):
+    """Create cumulative reduction expression."""
+    from dask_array._collection import Array
+    from dask_array.core._blockwise_funcs import _handle_out
+    if not isinstance(x, Array):
+        from dask_array.core._conversion import asarray
+        x = asarray(x)
+    x, axis = _prepare_cumulative(x, axis)
+    axis = validate_axis(axis, x.ndim)
+    if method == "blelloch":
+        if preop is None:
+            raise TypeError('cumreduction with "blelloch" method requires `preop=` argument')
+        expr = CumReductionBlelloch(x.expr, func, preop, binop, axis, dtype)
+    elif method == "sequential":
+        expr = CumReduction(x.expr, func, binop, ident, axis, dtype)
+    else:
+        raise ValueError(f'Invalid method for cumreduction. Expected "sequential" or "blelloch". Got: {method!r}')
+    result = new_collection(expr)
+    return _handle_out(out, result)
+def cumsum(x, axis=None, dtype=None, out=None, method="sequential"):
+    """Return the cumulative sum of the elements along a given axis.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative sum is computed. The default
+        (None) is to compute the cumsum over the flattened array.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are summed.
+    out : ndarray, optional
+        Not implemented for Dask arrays.
+    method : {'sequential', 'blelloch'}, optional
+        Algorithm to use for the cumulative sum. Default is 'sequential'.
+    Returns
+    -------
+    cumsum_along_axis : dask array
+        A new array holding the result.
+    """
+    return _cumreduction_expr(
+        np.cumsum,
+        _cumsum_merge,
+        0,
+        x,
+        axis,
+        dtype,
+        out=out,
+        method=method,
+        preop=np.sum,
+    )
+def cumprod(x, axis=None, dtype=None, out=None, method="sequential"):
+    """Return the cumulative product of elements along a given axis.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int, optional
+        Axis along which the cumulative product is computed. The default
+        (None) is to compute the cumprod over the flattened array.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are multiplied.
+    out : ndarray, optional
+        Not implemented for Dask arrays.
+    method : {'sequential', 'blelloch'}, optional
+        Algorithm to use for the cumulative product. Default is 'sequential'.
+    Returns
+    -------
+    cumprod_along_axis : dask array
+        A new array holding the result.
+    """
+    return _cumreduction_expr(
+        np.cumprod,
+        _cumprod_merge,
+        1,
+        x,
+        axis,
+        dtype,
+        out=out,
+        method=method,
+        preop=np.prod,
+    )
+def nancumsum(x, axis, dtype=None, out=None, *, method="sequential"):
+    """Return the cumulative sum of array elements treating NaNs as zero.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int
+        Axis along which the cumulative sum is computed.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are summed.
+    out : ndarray, optional
+        Not implemented for Dask arrays.
+    method : {'sequential', 'blelloch'}, optional
+        Algorithm to use for the cumulative sum. Default is 'sequential'.
+    Returns
+    -------
+    nancumsum_along_axis : dask array
+        A new array holding the result.
+    """
+    from dask_array import _chunk as chunk_module
+    return _cumreduction_expr(
+        chunk_module.nancumsum,
+        operator.add,
+        0,
+        x,
+        axis,
+        dtype,
+        out=out,
+        method=method,
+        preop=np.nansum,
+    )
+def nancumprod(x, axis, dtype=None, out=None, *, method="sequential"):
+    """Return the cumulative product of array elements treating NaNs as one.
+    Parameters
+    ----------
+    x : array_like
+        Input array.
+    axis : int
+        Axis along which the cumulative product is computed.
+    dtype : dtype, optional
+        Type of the returned array and of the accumulator in which the
+        elements are multiplied.
+    out : ndarray, optional
+        Not implemented for Dask arrays.
+    method : {'sequential', 'blelloch'}, optional
+        Algorithm to use for the cumulative product. Default is 'sequential'.
+    Returns
+    -------
+    nancumprod_along_axis : dask array
+        A new array holding the result.
+    """
+    from dask_array import _chunk as chunk_module
+    return _cumreduction_expr(
+        chunk_module.nancumprod,
+        operator.mul,
+        1,
+        x,
+        axis,
+        dtype,
+        out=out,
+        method=method,
+        preop=np.nanprod,
+    )
+def cumreduction(
+    func,
+    binop,
+    ident,
+    x,
+    axis=None,
+    dtype=None,
+    out=None,
+    method="sequential",
+    preop=None,
+):
+    """Generic cumulative reduction. See dask.array.reductions.cumreduction."""
+    return _cumreduction_expr(func, binop, ident, x, axis, dtype, out=out, method=method, preop=preop)