dask-array 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dask_array/__init__.py +228 -0
- dask_array/_backends.py +76 -0
- dask_array/_backends_array.py +99 -0
- dask_array/_blockwise.py +1410 -0
- dask_array/_broadcast.py +272 -0
- dask_array/_chunk.py +445 -0
- dask_array/_chunk_types.py +54 -0
- dask_array/_collection.py +1644 -0
- dask_array/_concatenate.py +331 -0
- dask_array/_core_utils.py +1365 -0
- dask_array/_dispatch.py +141 -0
- dask_array/_einsum.py +277 -0
- dask_array/_expr.py +544 -0
- dask_array/_expr_flow.py +586 -0
- dask_array/_gufunc.py +805 -0
- dask_array/_histogram.py +617 -0
- dask_array/_map_blocks.py +652 -0
- dask_array/_new_collection.py +10 -0
- dask_array/_numpy_compat.py +135 -0
- dask_array/_overlap.py +1159 -0
- dask_array/_rechunk.py +1050 -0
- dask_array/_reshape.py +710 -0
- dask_array/_routines.py +102 -0
- dask_array/_shuffle.py +448 -0
- dask_array/_stack.py +264 -0
- dask_array/_svg.py +291 -0
- dask_array/_templates.py +29 -0
- dask_array/_test_utils.py +257 -0
- dask_array/_ufunc.py +385 -0
- dask_array/_utils.py +349 -0
- dask_array/_visualize.py +223 -0
- dask_array/_xarray.py +337 -0
- dask_array/core/__init__.py +34 -0
- dask_array/core/_blockwise_funcs.py +312 -0
- dask_array/core/_conversion.py +422 -0
- dask_array/core/_from_graph.py +97 -0
- dask_array/creation/__init__.py +71 -0
- dask_array/creation/_arange.py +121 -0
- dask_array/creation/_diag.py +116 -0
- dask_array/creation/_diagonal.py +241 -0
- dask_array/creation/_eye.py +103 -0
- dask_array/creation/_linspace.py +102 -0
- dask_array/creation/_mesh.py +134 -0
- dask_array/creation/_ones_zeros.py +454 -0
- dask_array/creation/_pad.py +270 -0
- dask_array/creation/_repeat.py +55 -0
- dask_array/creation/_tile.py +36 -0
- dask_array/creation/_tri.py +28 -0
- dask_array/creation/_utils.py +296 -0
- dask_array/fft.py +320 -0
- dask_array/io/__init__.py +39 -0
- dask_array/io/_base.py +10 -0
- dask_array/io/_from_array.py +257 -0
- dask_array/io/_from_delayed.py +95 -0
- dask_array/io/_from_graph.py +54 -0
- dask_array/io/_from_npy_stack.py +67 -0
- dask_array/io/_store.py +336 -0
- dask_array/io/_tiledb.py +159 -0
- dask_array/io/_to_npy_stack.py +65 -0
- dask_array/io/_zarr.py +449 -0
- dask_array/linalg/__init__.py +39 -0
- dask_array/linalg/_cholesky.py +234 -0
- dask_array/linalg/_lu.py +300 -0
- dask_array/linalg/_norm.py +94 -0
- dask_array/linalg/_qr.py +601 -0
- dask_array/linalg/_solve.py +349 -0
- dask_array/linalg/_svd.py +394 -0
- dask_array/linalg/_tensordot.py +334 -0
- dask_array/linalg/_utils.py +74 -0
- dask_array/manipulation/__init__.py +45 -0
- dask_array/manipulation/_expand.py +321 -0
- dask_array/manipulation/_flip.py +92 -0
- dask_array/manipulation/_roll.py +78 -0
- dask_array/manipulation/_transpose.py +309 -0
- dask_array/random/__init__.py +125 -0
- dask_array/random/_choice.py +181 -0
- dask_array/random/_expr.py +256 -0
- dask_array/random/_generator.py +441 -0
- dask_array/random/_random_state.py +259 -0
- dask_array/random/_utils.py +84 -0
- dask_array/reductions/__init__.py +84 -0
- dask_array/reductions/_arg_reduction.py +130 -0
- dask_array/reductions/_common.py +1082 -0
- dask_array/reductions/_cumulative.py +522 -0
- dask_array/reductions/_percentile.py +261 -0
- dask_array/reductions/_reduction.py +725 -0
- dask_array/reductions/_trace.py +56 -0
- dask_array/routines/__init__.py +133 -0
- dask_array/routines/_apply.py +84 -0
- dask_array/routines/_bincount.py +112 -0
- dask_array/routines/_broadcast.py +111 -0
- dask_array/routines/_coarsen.py +115 -0
- dask_array/routines/_diff.py +79 -0
- dask_array/routines/_gradient.py +158 -0
- dask_array/routines/_indexing.py +65 -0
- dask_array/routines/_insert_delete.py +132 -0
- dask_array/routines/_misc.py +122 -0
- dask_array/routines/_nonzero.py +72 -0
- dask_array/routines/_search.py +123 -0
- dask_array/routines/_select.py +113 -0
- dask_array/routines/_statistics.py +171 -0
- dask_array/routines/_topk.py +82 -0
- dask_array/routines/_triangular.py +74 -0
- dask_array/routines/_unique.py +232 -0
- dask_array/routines/_where.py +62 -0
- dask_array/slicing/__init__.py +67 -0
- dask_array/slicing/_basic.py +550 -0
- dask_array/slicing/_blocks.py +138 -0
- dask_array/slicing/_bool_index.py +145 -0
- dask_array/slicing/_setitem.py +329 -0
- dask_array/slicing/_squeeze.py +101 -0
- dask_array/slicing/_utils.py +1133 -0
- dask_array/slicing/_vindex.py +282 -0
- dask_array/stacking/__init__.py +15 -0
- dask_array/stacking/_block.py +83 -0
- dask_array/stacking/_simple.py +58 -0
- dask_array/templates/array.html.j2 +48 -0
- dask_array/tests/__init__.py +0 -0
- dask_array/tests/conftest.py +22 -0
- dask_array/tests/test_api.py +40 -0
- dask_array/tests/test_binary_op_chunks.py +107 -0
- dask_array/tests/test_coarse_slice_through_blockwise.py +362 -0
- dask_array/tests/test_collection.py +799 -0
- dask_array/tests/test_creation.py +1102 -0
- dask_array/tests/test_expr_flow.py +143 -0
- dask_array/tests/test_linalg.py +1130 -0
- dask_array/tests/test_map_blocks_multi_output.py +104 -0
- dask_array/tests/test_rechunk_pushdown.py +214 -0
- dask_array/tests/test_reductions.py +1091 -0
- dask_array/tests/test_routines.py +2853 -0
- dask_array/tests/test_shuffle_chunks.py +67 -0
- dask_array/tests/test_slice_pushdown.py +968 -0
- dask_array/tests/test_slice_through_blockwise.py +678 -0
- dask_array/tests/test_slice_through_overlap.py +366 -0
- dask_array/tests/test_slice_through_reshape.py +272 -0
- dask_array/tests/test_slicing.py +839 -0
- dask_array/tests/test_transpose_slice_pushdown.py +208 -0
- dask_array/tests/test_visualize.py +94 -0
- dask_array/tests/test_xarray.py +193 -0
- dask_array-0.1.0.dist-info/METADATA +48 -0
- dask_array-0.1.0.dist-info/RECORD +144 -0
- dask_array-0.1.0.dist-info/WHEEL +4 -0
- dask_array-0.1.0.dist-info/entry_points.txt +2 -0
- dask_array-0.1.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Tests for shuffle output chunk sizing with input chunk locality grouping."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import dask_array as da
|
|
9
|
+
from dask_array._test_utils import assert_eq
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_contiguous_indexing_splits_to_input_chunk_size():
|
|
13
|
+
"""np.repeat pattern: output chunks stay close to input chunk size."""
|
|
14
|
+
np_x = np.arange(100 * 10).reshape(100, 10)
|
|
15
|
+
x = da.from_array(np_x, chunks=(25, 10)) # 4 input chunks of 25 each
|
|
16
|
+
|
|
17
|
+
# Contiguous: each input element repeated 3 times
|
|
18
|
+
# Each input chunk of 25 elements becomes 75 output elements
|
|
19
|
+
# These get split into chunks of 25, so 3 output chunks per input chunk = 12 total
|
|
20
|
+
indexer = np.repeat(np.arange(100), 3) # [0,0,0,1,1,1,...,99,99,99]
|
|
21
|
+
result = x[indexer, :]
|
|
22
|
+
|
|
23
|
+
assert max(result.chunks[0]) == 25
|
|
24
|
+
assert result.numblocks[0] == 12 # 4 input chunks * 3 splits each
|
|
25
|
+
assert_eq(result, np_x[indexer, :])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_scattered_indexing_correctness():
|
|
29
|
+
"""np.tile pattern: scattered access still produces correct results."""
|
|
30
|
+
np_x = np.arange(100 * 10).reshape(100, 10)
|
|
31
|
+
x = da.from_array(np_x, chunks=(25, 10))
|
|
32
|
+
|
|
33
|
+
indexer = np.tile(np.arange(100), 3) # [0,1,...,99,0,1,...,99,...]
|
|
34
|
+
result = x[indexer, :]
|
|
35
|
+
|
|
36
|
+
assert_eq(result, np_x[indexer, :])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_identity_indexing_no_shuffle():
|
|
40
|
+
"""Identity indexing should not create a shuffle."""
|
|
41
|
+
from dask_array._shuffle import Shuffle
|
|
42
|
+
|
|
43
|
+
np_x = np.arange(120).reshape(12, 10)
|
|
44
|
+
x = da.from_array(np_x, chunks=(3, 10))
|
|
45
|
+
|
|
46
|
+
result = x[np.arange(12), :]
|
|
47
|
+
|
|
48
|
+
assert not isinstance(result.expr, Shuffle)
|
|
49
|
+
assert_eq(result, np_x)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_large_repeat_splits_oversized_groups():
|
|
53
|
+
"""np.repeat with large factor should not create oversized chunks.
|
|
54
|
+
|
|
55
|
+
When each element is repeated many times, the output chunks should be
|
|
56
|
+
split to match input chunk sizes, not grow unboundedly.
|
|
57
|
+
"""
|
|
58
|
+
np_x = np.arange(100 * 10).reshape(100, 10)
|
|
59
|
+
x = da.from_array(np_x, chunks=(25, 10)) # 4 input chunks, 25 elements each
|
|
60
|
+
|
|
61
|
+
# Each element repeated 100 times -> naive would give chunks of 25*100=2500
|
|
62
|
+
# With max input chunk size of 25, groups get split into chunks of 25
|
|
63
|
+
indexer = np.repeat(np.arange(100), 100)
|
|
64
|
+
result = x[indexer, :]
|
|
65
|
+
|
|
66
|
+
assert max(result.chunks[0]) == 25
|
|
67
|
+
assert_eq(result, np_x[indexer, :])
|