patchworks 0.8.0__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patchworks-0.8.0 → patchworks-0.9.0}/.github/workflows/lint.yml +10 -0
- patchworks-0.9.0/.markdownlint-cli2.yaml +16 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/PKG-INFO +3 -2
- {patchworks-0.8.0 → patchworks-0.9.0}/README.md +2 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/stardist.md +10 -10
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/getting_started.md +36 -36
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/gpu_distributed.md +1 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/merging.md +2 -2
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/ome_zarr_napari.md +22 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/pitfalls.md +1 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/skip_empty.md +1 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/tiling.md +3 -2
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/index.md +1 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_chunks.py +16 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_cluster.py +19 -2
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_core.py +41 -3
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_io.py +26 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_merge.py +110 -1
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/_relabel.py +10 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/plugins/cellpose.py +56 -2
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/plugins/napari.py +85 -4
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/plugins/ome_zarr.py +506 -25
- {patchworks-0.8.0 → patchworks-0.9.0}/tests/test_ome_zarr.py +39 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/.github/workflows/docs.yml +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/.github/workflows/release.yml +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/.gitignore +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/cliff.toml +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/chunks.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/cluster.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/io.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/merge_tile_labels.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/plugins/cellpose.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/plugins/napari.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/plugins/ome_zarr.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/relabel.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/api/tile_process.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/cellpose_2d.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/cellpose_2d.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/cellpose_3d.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/cellpose_3d.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/custom.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/custom_method.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/standalone_merge.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/examples/stardist_2d.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/docs/guide/performance.md +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/mkdocs.yml +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/pyproject.toml +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/__init__.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/src/patchworks/plugins/__init__.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/tests/test_core.py +0 -0
- {patchworks-0.8.0 → patchworks-0.9.0}/tests/test_napari.py +0 -0
|
@@ -23,3 +23,13 @@ jobs:
|
|
|
23
23
|
with:
|
|
24
24
|
version: "0.15.18"
|
|
25
25
|
args: "format --check"
|
|
26
|
+
|
|
27
|
+
markdownlint:
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
steps:
|
|
30
|
+
- uses: actions/checkout@v4
|
|
31
|
+
|
|
32
|
+
- name: markdownlint
|
|
33
|
+
uses: DavidAnson/markdownlint-cli2-action@v20
|
|
34
|
+
with:
|
|
35
|
+
globs: "**/*.md"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# markdownlint-cli2 configuration — see https://github.com/DavidAnson/markdownlint
|
|
2
|
+
config:
|
|
3
|
+
MD013: false # line length (tables, code and long prose)
|
|
4
|
+
MD033: false # inline HTML (badges, <img>)
|
|
5
|
+
MD041: false # first line need not be a top-level heading (badges)
|
|
6
|
+
MD024:
|
|
7
|
+
siblings_only: true # repeated headings OK across different sections
|
|
8
|
+
MD046: false # allow both indented and fenced code blocks
|
|
9
|
+
MD060: false # don't enforce table pipe spacing style
|
|
10
|
+
globs:
|
|
11
|
+
- "**/*.md"
|
|
12
|
+
ignores:
|
|
13
|
+
- "site"
|
|
14
|
+
- ".pixi"
|
|
15
|
+
- "PUBLIC"
|
|
16
|
+
- "**/node_modules"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchworks
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.9.0
|
|
4
4
|
Summary: Tiled processing of arbitrarily large images with globally consistent labels
|
|
5
5
|
Project-URL: Homepage, https://github.com/imcf/patchworks
|
|
6
6
|
Project-URL: Issues, https://github.com/imcf/patchworks/issues
|
|
@@ -73,7 +73,7 @@ Description-Content-Type: text/markdown
|
|
|
73
73
|
|
|
74
74
|
> Tiled processing of arbitrarily large images — any image, any function.
|
|
75
75
|
|
|
76
|
-
```
|
|
76
|
+
```text
|
|
77
77
|
┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
|
|
78
78
|
│ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
|
|
79
79
|
├──────┼──────┼──────┤ ├──────┼──────┼──────┤
|
|
@@ -361,6 +361,7 @@ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
|
|
|
361
361
|
- dask[array], numpy, zarr, scipy
|
|
362
362
|
|
|
363
363
|
Optional:
|
|
364
|
+
|
|
364
365
|
- `psutil` — accurate RAM sizing for `tile_shape="auto"`
|
|
365
366
|
- `nvidia-ml-py` — accurate GPU VRAM sizing
|
|
366
367
|
- `tqdm` — progress bars
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
> Tiled processing of arbitrarily large images — any image, any function.
|
|
9
9
|
|
|
10
|
-
```
|
|
10
|
+
```text
|
|
11
11
|
┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
|
|
12
12
|
│ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
|
|
13
13
|
├──────┼──────┼──────┤ ├──────┼──────┼──────┤
|
|
@@ -295,6 +295,7 @@ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
|
|
|
295
295
|
- dask[array], numpy, zarr, scipy
|
|
296
296
|
|
|
297
297
|
Optional:
|
|
298
|
+
|
|
298
299
|
- `psutil` — accurate RAM sizing for `tile_shape="auto"`
|
|
299
300
|
- `nvidia-ml-py` — accurate GPU VRAM sizing
|
|
300
301
|
- `tqdm` — progress bars
|
|
@@ -47,18 +47,18 @@ tile_process(
|
|
|
47
47
|
Load the model **outside** the `fn` closure. If you load it inside,
|
|
48
48
|
it will be re-initialised (and potentially re-downloaded) once per tile.
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
For distributed execution, use `functools.partial` with a cached model:
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
```python
|
|
53
|
+
from functools import lru_cache
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
@lru_cache(maxsize=1)
|
|
57
|
+
def _get_model():
|
|
58
|
+
return StarDist2D.from_pretrained("2D_versatile_fluo")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
61
|
+
def stardist_fn(tile):
|
|
62
|
+
model = _get_model()
|
|
63
|
+
...
|
|
64
|
+
```
|
|
@@ -46,11 +46,11 @@ patchworks can be installed from PyPI on all operating systems, for Python ≥ 3
|
|
|
46
46
|
|
|
47
47
|
## The one function you need
|
|
48
48
|
|
|
49
|
-
```python
|
|
50
|
-
from patchworks import tile_process
|
|
49
|
+
```python
|
|
50
|
+
from patchworks import tile_process
|
|
51
51
|
|
|
52
|
-
result = tile_process(image, fn)
|
|
53
|
-
```
|
|
52
|
+
result = tile_process(image, fn)
|
|
53
|
+
```
|
|
54
54
|
|
|
55
55
|
`tile_process(image, fn)` splits `image` into tiles, runs `fn` on each tile,
|
|
56
56
|
and returns a globally consistent label array.
|
|
@@ -65,17 +65,17 @@ and returns a globally consistent label array.
|
|
|
65
65
|
patchworks is method-agnostic. Your function receives a NumPy array (one tile)
|
|
66
66
|
and must return an integer label array of the same shape:
|
|
67
67
|
|
|
68
|
-
```python
|
|
69
|
-
import numpy as np
|
|
68
|
+
```python
|
|
69
|
+
import numpy as np
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
def my_fn(tile: np.ndarray) -> np.ndarray:
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
def my_fn(tile: np.ndarray) -> np.ndarray:
|
|
73
|
+
from skimage.filters import threshold_otsu
|
|
74
|
+
from skimage.measure import label
|
|
75
75
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
```
|
|
76
|
+
binary = tile > threshold_otsu(tile)
|
|
77
|
+
return label(binary).astype("int32")
|
|
78
|
+
```
|
|
79
79
|
|
|
80
80
|
The function is called independently on every tile. patchworks ensures that
|
|
81
81
|
objects spanning tile boundaries are merged into a single label.
|
|
@@ -155,14 +155,14 @@ objects spanning tile boundaries are merged into a single label.
|
|
|
155
155
|
Methods like Cellpose and StarDist need spatial context at tile boundaries.
|
|
156
156
|
Use `overlap` (in voxels) so boundary objects are fully visible:
|
|
157
157
|
|
|
158
|
-
```python
|
|
159
|
-
result = tile_process(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
)
|
|
165
|
-
```
|
|
158
|
+
```python
|
|
159
|
+
result = tile_process(
|
|
160
|
+
"image.zarr",
|
|
161
|
+
my_fn,
|
|
162
|
+
tile_shape=(1, 2048, 2048),
|
|
163
|
+
overlap=20, # 20-voxel halo on every side
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
166
|
|
|
167
167
|
!!! info "How overlap works"
|
|
168
168
|
Each tile is expanded by `overlap` voxels on every side before calling `fn`.
|
|
@@ -173,22 +173,22 @@ result = tile_process(
|
|
|
173
173
|
|
|
174
174
|
## Use Cellpose
|
|
175
175
|
|
|
176
|
-
```python
|
|
177
|
-
from patchworks import tile_process
|
|
178
|
-
from patchworks.plugins.cellpose import cellpose_fn
|
|
179
|
-
|
|
180
|
-
fn = cellpose_fn("cyto3", gpu=True, diameter=30)
|
|
181
|
-
|
|
182
|
-
tile_process(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
)
|
|
191
|
-
```
|
|
176
|
+
```python
|
|
177
|
+
from patchworks import tile_process
|
|
178
|
+
from patchworks.plugins.cellpose import cellpose_fn
|
|
179
|
+
|
|
180
|
+
fn = cellpose_fn("cyto3", gpu=True, diameter=30)
|
|
181
|
+
|
|
182
|
+
tile_process(
|
|
183
|
+
"image.zarr",
|
|
184
|
+
fn,
|
|
185
|
+
channel=0,
|
|
186
|
+
tile_shape=(1, 2048, 2048),
|
|
187
|
+
overlap=20,
|
|
188
|
+
write_to="labels.zarr",
|
|
189
|
+
progress=True,
|
|
190
|
+
)
|
|
191
|
+
```
|
|
192
192
|
|
|
193
193
|
See the [Cellpose 2-D example](examples/cellpose_2d.md) for the full workflow.
|
|
194
194
|
|
|
@@ -60,7 +60,7 @@ in the same process as the kernel. When your segmentation function holds the
|
|
|
60
60
|
Python GIL (every PyTorch/CUDA `eval` does), the worker thread can't send
|
|
61
61
|
heartbeats. The scheduler declares it dead, and the merge fails:
|
|
62
62
|
|
|
63
|
-
```
|
|
63
|
+
```python
|
|
64
64
|
FutureCancelledError: lost dependencies
|
|
65
65
|
```
|
|
66
66
|
|
|
@@ -9,7 +9,7 @@ even though it's the same cell.
|
|
|
9
9
|
|
|
10
10
|
patchworks solves this with a zarr-native merge algorithm:
|
|
11
11
|
|
|
12
|
-
```
|
|
12
|
+
```text
|
|
13
13
|
Tile A labels: Tile B labels: After merge:
|
|
14
14
|
┌────────────┐ ┌────────────┐ ┌──────────────────────┐
|
|
15
15
|
│ 3 1 2 │ │ 1 4 2 │ │ 3 1 2 │ 501 5 502│
|
|
@@ -32,7 +32,7 @@ Each tile's labels are written to a temporary zarr once. This is critical:
|
|
|
32
32
|
without staging, any downstream operation that reads the label array re-runs
|
|
33
33
|
your segmentation function. The merge internally reads labels multiple times.
|
|
34
34
|
|
|
35
|
-
```
|
|
35
|
+
```text
|
|
36
36
|
tile_process calls fn once per tile → staged zarr
|
|
37
37
|
│
|
|
38
38
|
merge reads from staged zarr (no fn calls)
|
|
@@ -76,6 +76,28 @@ and streaming the downsampled result out through dask with bounded chunks. The
|
|
|
76
76
|
graph never chains level-on-level and no whole plane/volume is held in RAM, so
|
|
77
77
|
terabyte images convert in bounded memory.
|
|
78
78
|
|
|
79
|
+
### Sharding (fewer files)
|
|
80
|
+
|
|
81
|
+
A big array becomes tens of thousands of tiny chunk files, which strain
|
|
82
|
+
filesystems and object stores. Sharding packs many chunks into one **shard**
|
|
83
|
+
file (zarr v3), cutting the file count ~100×:
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
to_ome_zarr("scan.ims", "scan.zarr", shard=True) # auto ~512 MB shards
|
|
87
|
+
to_ome_zarr("scan.ims", "scan.zarr", shard=(1, 16, 2048, 2048)) # explicit
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Default is `shard=False` for maximum reader compatibility — sharding is
|
|
91
|
+
zarr-v3-only, so older tools may not read it (your zarr/napari stack does).
|
|
92
|
+
A sharded write holds ~one shard per worker in RAM, so very large shards cost
|
|
93
|
+
memory.
|
|
94
|
+
|
|
95
|
+
### Progress
|
|
96
|
+
|
|
97
|
+
All write steps show a dask progress bar **by default** (`progress=True`), so
|
|
98
|
+
you can see how long a conversion will take. Pass `progress=False` to silence
|
|
99
|
+
it.
|
|
100
|
+
|
|
79
101
|
!!! note "Install the readers you need"
|
|
80
102
|
`pip install "patchworks[bioio]"` pulls `bioio` plus the `bioio-bioformats`
|
|
81
103
|
catch-all reader (needs a JVM). For speed, add native readers for your
|
|
@@ -30,7 +30,7 @@ single-GPU runs — patchworks pins it to 1 thread automatically).
|
|
|
30
30
|
|
|
31
31
|
patchworks detects in-process clients at startup and raises immediately:
|
|
32
32
|
|
|
33
|
-
```
|
|
33
|
+
```python
|
|
34
34
|
RuntimeError: Active Dask client uses an in-process worker (processes=False).
|
|
35
35
|
This breaks the label merge when fn holds the GIL. Use a process-based
|
|
36
36
|
cluster instead:
|
|
@@ -13,6 +13,7 @@ peak RAM during segmentation is approximately one tile's worth of data.
|
|
|
13
13
|
## Choosing a tile size
|
|
14
14
|
|
|
15
15
|
The right tile size depends on:
|
|
16
|
+
|
|
16
17
|
- Your available RAM (or GPU VRAM)
|
|
17
18
|
- The minimum context your segmentation method needs (objects should fit fully
|
|
18
19
|
inside a tile, or you need overlap)
|
|
@@ -62,7 +63,7 @@ Methods that need spatial context (Cellpose, StarDist, U-Net) produce wrong
|
|
|
62
63
|
results near tile edges: objects at the boundary are cut off. Overlap fixes this
|
|
63
64
|
by expanding each tile by `overlap` voxels on every side.
|
|
64
65
|
|
|
65
|
-
```
|
|
66
|
+
```text
|
|
66
67
|
No overlap: With overlap=20:
|
|
67
68
|
┌──────────┐ ┌──────────────────┐
|
|
68
69
|
│ │ │ ░░░░░░░░░░░░░░ │
|
|
@@ -86,4 +87,4 @@ No overlap: With overlap=20:
|
|
|
86
87
|
automatically clips the depth per axis, so z-tiles of size 1 (typical in
|
|
87
88
|
2-D Cellpose mode) get `depth=0` in z even if you pass `overlap=20`.
|
|
88
89
|
|
|
89
|
-
|
|
90
|
+
Axes that are too small for the requested overlap simply get a smaller halo.
|
|
@@ -49,6 +49,14 @@ _GPU_MEMORY_FALLBACK = 8 * 1024**3
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
def _get_available_memory() -> int:
|
|
52
|
+
"""Return available system RAM in bytes.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
int
|
|
57
|
+
Available memory via ``psutil``, or an 8 GiB fallback if it is not
|
|
58
|
+
installed.
|
|
59
|
+
"""
|
|
52
60
|
try:
|
|
53
61
|
import psutil
|
|
54
62
|
|
|
@@ -103,7 +111,14 @@ def safe_worker_count(
|
|
|
103
111
|
|
|
104
112
|
|
|
105
113
|
def _get_gpu_memory() -> int:
|
|
106
|
-
"""Return free GPU VRAM in bytes.
|
|
114
|
+
"""Return free GPU VRAM in bytes.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
int
|
|
119
|
+
Free VRAM of GPU 0 via ``nvidia-ml-py``, or an 8 GiB fallback if the
|
|
120
|
+
query fails.
|
|
121
|
+
"""
|
|
107
122
|
try:
|
|
108
123
|
import pynvml
|
|
109
124
|
|
|
@@ -9,7 +9,14 @@ logger = logging.getLogger(__name__)
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def _distributed_client():
|
|
12
|
-
"""Return the active dask.distributed Client, or None.
|
|
12
|
+
"""Return the active dask.distributed Client, or None.
|
|
13
|
+
|
|
14
|
+
Returns
|
|
15
|
+
-------
|
|
16
|
+
distributed.Client or None
|
|
17
|
+
The current client, or ``None`` if none is active / distributed is not
|
|
18
|
+
installed.
|
|
19
|
+
"""
|
|
13
20
|
try:
|
|
14
21
|
from dask.distributed import get_client
|
|
15
22
|
|
|
@@ -19,12 +26,22 @@ def _distributed_client():
|
|
|
19
26
|
|
|
20
27
|
|
|
21
28
|
def _client_is_in_process(client) -> bool:
|
|
22
|
-
"""
|
|
29
|
+
"""Whether *client* runs its worker in this process (``processes=False``).
|
|
23
30
|
|
|
24
31
|
An in-process worker shares the GIL. A long task that holds the GIL
|
|
25
32
|
(e.g. a Cellpose/torch eval) starves the worker heartbeat, the scheduler
|
|
26
33
|
declares it dead, and the P2P merge barrier drops its inputs →
|
|
27
34
|
"FutureCancelledError: lost dependencies".
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
client : distributed.Client
|
|
39
|
+
The client to inspect.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
bool
|
|
44
|
+
True if any worker address uses the ``inproc://`` transport.
|
|
28
45
|
"""
|
|
29
46
|
try:
|
|
30
47
|
for addr in client.scheduler_info().get("workers", {}):
|
|
@@ -23,7 +23,23 @@ logger = logging.getLogger(__name__)
|
|
|
23
23
|
def _stage_to_zarr(
|
|
24
24
|
arr: da.Array, path: str, component: str, show_progress: bool
|
|
25
25
|
) -> None:
|
|
26
|
-
"""Write *arr* to zarr
|
|
26
|
+
"""Write *arr* to zarr ``path/component``, never loading it into RAM.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
arr : da.Array
|
|
31
|
+
Array to materialise to disk.
|
|
32
|
+
path : str
|
|
33
|
+
Zarr store path.
|
|
34
|
+
component : str
|
|
35
|
+
Array name within the store.
|
|
36
|
+
show_progress : bool
|
|
37
|
+
Show a progress bar while computing.
|
|
38
|
+
|
|
39
|
+
Returns
|
|
40
|
+
-------
|
|
41
|
+
None
|
|
42
|
+
"""
|
|
27
43
|
import dask
|
|
28
44
|
|
|
29
45
|
lazy_write = arr.to_zarr(
|
|
@@ -57,7 +73,7 @@ def tile_process(
|
|
|
57
73
|
level: int = 0,
|
|
58
74
|
use_gpu: bool = False,
|
|
59
75
|
max_workers: int | None = None,
|
|
60
|
-
progress: bool =
|
|
76
|
+
progress: bool = True,
|
|
61
77
|
write_to: Union[str, Path, None] = None,
|
|
62
78
|
output_component: str = "labels",
|
|
63
79
|
pyramid_levels: int = 5,
|
|
@@ -123,7 +139,8 @@ def tile_process(
|
|
|
123
139
|
every core. Ignored when a distributed client is active (it manages its
|
|
124
140
|
own concurrency).
|
|
125
141
|
progress:
|
|
126
|
-
Show
|
|
142
|
+
Show progress bars for staging, the label write and the pyramid
|
|
143
|
+
(default ``True``). Set ``False`` to silence them.
|
|
127
144
|
write_to:
|
|
128
145
|
Explicit output zarr store path. Overrides the default behaviour: the
|
|
129
146
|
merged labels are written here as a single-resolution array named
|
|
@@ -297,6 +314,20 @@ def tile_process(
|
|
|
297
314
|
_skip_thr = _auto_empty_threshold(image_for_threshold, channel, level)
|
|
298
315
|
|
|
299
316
|
def active_fn(block, block_info=None):
|
|
317
|
+
"""Run *fn* on one tile, or return zeros for an empty tile.
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
block : np.ndarray
|
|
322
|
+
One image tile.
|
|
323
|
+
block_info : dict or None
|
|
324
|
+
Dask block metadata (used for logging the tile location).
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
np.ndarray
|
|
329
|
+
Integer labels, or an all-zero tile when skipped.
|
|
330
|
+
"""
|
|
300
331
|
loc = block_info[0].get("chunk-location") if block_info else "?"
|
|
301
332
|
if skip_empty and block.size and block.max() <= _skip_thr:
|
|
302
333
|
if verbose:
|
|
@@ -398,6 +429,12 @@ def tile_process(
|
|
|
398
429
|
# that figure instead.
|
|
399
430
|
|
|
400
431
|
def _cleanup_stage():
|
|
432
|
+
"""Delete the temporary stage store unless ``keep_stage`` is set.
|
|
433
|
+
|
|
434
|
+
Returns
|
|
435
|
+
-------
|
|
436
|
+
None
|
|
437
|
+
"""
|
|
401
438
|
if not keep_stage:
|
|
402
439
|
import shutil
|
|
403
440
|
|
|
@@ -457,6 +494,7 @@ def tile_process(
|
|
|
457
494
|
name=output_component,
|
|
458
495
|
n_levels=pyramid_levels,
|
|
459
496
|
downscale=pyramid_downscale,
|
|
497
|
+
progress=progress,
|
|
460
498
|
overwrite=True,
|
|
461
499
|
)
|
|
462
500
|
shutil.rmtree(os.path.dirname(_merge_out), ignore_errors=True)
|
|
@@ -75,6 +75,16 @@ def _otsu_threshold(sample: np.ndarray) -> float:
|
|
|
75
75
|
|
|
76
76
|
Operates on the full distribution including zeros — zeros are background
|
|
77
77
|
pixels and must be included so Otsu can find the signal/background boundary.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
sample : np.ndarray
|
|
82
|
+
Flat intensity sample.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
float
|
|
87
|
+
The Otsu threshold, or ``0.0`` when the sample is degenerate.
|
|
78
88
|
"""
|
|
79
89
|
try:
|
|
80
90
|
from skimage.filters import threshold_otsu
|
|
@@ -89,7 +99,22 @@ def _otsu_threshold(sample: np.ndarray) -> float:
|
|
|
89
99
|
def _auto_empty_threshold(
|
|
90
100
|
image: da.Array, channel: int | None, level: int
|
|
91
101
|
) -> float:
|
|
92
|
-
"""Pick an empty-tile threshold from a cheap bounded sample (Otsu).
|
|
102
|
+
"""Pick an empty-tile threshold from a cheap bounded sample (Otsu).
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
image : da.Array
|
|
107
|
+
Image to sample.
|
|
108
|
+
channel : int or None
|
|
109
|
+
Channel hint (kept for signature symmetry).
|
|
110
|
+
level : int
|
|
111
|
+
Pyramid level hint (kept for signature symmetry).
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
float
|
|
116
|
+
Otsu threshold over a few small centred windows.
|
|
117
|
+
"""
|
|
93
118
|
n = image.ndim
|
|
94
119
|
win = [min(64 if i >= n - 3 else s, s) for i, s in enumerate(image.shape)]
|
|
95
120
|
win = [min(w, 256) if i >= n - 2 else w for i, w in enumerate(win)]
|
|
@@ -55,6 +55,25 @@ _merge_out_comp: "str | None" = None
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def _init_worker(lut_path, staged_path, staged_comp, out_path, out_comp):
|
|
58
|
+
"""Initialise a merge worker process with the shared paths and LUT.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
lut_path : str
|
|
63
|
+
Path to the relabel lookup table (loaded memory-mapped, read-only).
|
|
64
|
+
staged_path : str
|
|
65
|
+
Path to the staged-labels zarr store.
|
|
66
|
+
staged_comp : str
|
|
67
|
+
Component name within the staged store.
|
|
68
|
+
out_path : str
|
|
69
|
+
Path to the output zarr store.
|
|
70
|
+
out_comp : str
|
|
71
|
+
Component name within the output store.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
None
|
|
76
|
+
"""
|
|
58
77
|
global _merge_lut, _merge_lut_path, _merge_staged_path, _merge_staged_comp
|
|
59
78
|
global _merge_out_path, _merge_out_comp
|
|
60
79
|
_merge_lut = np.load(
|
|
@@ -68,6 +87,17 @@ def _init_worker(lut_path, staged_path, staged_comp, out_path, out_comp):
|
|
|
68
87
|
|
|
69
88
|
|
|
70
89
|
def _relabel_chunk_worker(chunk_slice: tuple) -> None:
|
|
90
|
+
"""Apply the relabel LUT to one chunk and write it to the output store.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
chunk_slice : tuple
|
|
95
|
+
The slice selecting this chunk in both stores.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
None
|
|
100
|
+
"""
|
|
71
101
|
src = zarr.open_group(_merge_staged_path, mode="r")[_merge_staged_comp]
|
|
72
102
|
dst = zarr.open_group(_merge_out_path, mode="r+")[_merge_out_comp]
|
|
73
103
|
block = np.asarray(src[chunk_slice], dtype=np.int64)
|
|
@@ -87,6 +117,20 @@ def _relabel_chunk_worker(chunk_slice: tuple) -> None:
|
|
|
87
117
|
def _boundary_face_specs(
|
|
88
118
|
shape: tuple[int, ...], chunk_shape: tuple[int, ...]
|
|
89
119
|
) -> list[tuple[int, int]]:
|
|
120
|
+
"""Enumerate interior chunk boundaries to scan for touching labels.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
shape : tuple of int
|
|
125
|
+
Array shape.
|
|
126
|
+
chunk_shape : tuple of int
|
|
127
|
+
Chunk shape.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
list of tuple of int
|
|
132
|
+
``(axis, position)`` pairs, one per interior chunk boundary.
|
|
133
|
+
"""
|
|
90
134
|
specs = []
|
|
91
135
|
for ax, (s, cs) in enumerate(zip(shape, chunk_shape)):
|
|
92
136
|
pos = cs
|
|
@@ -105,6 +149,21 @@ def _scan_touching_pairs(
|
|
|
105
149
|
is bounded to one chunk (~200 MB). Reading the full face at once
|
|
106
150
|
(slice(None) on face axes) would allocate face_area × 8 bytes in one shot —
|
|
107
151
|
e.g. 37888 × 27392 × 8 = 8 GiB for a single z-face (OOM on real datasets).
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
zarr_path : str
|
|
156
|
+
Path to the staged-labels zarr store.
|
|
157
|
+
component : str
|
|
158
|
+
Component name within the store.
|
|
159
|
+
chunk_shape : tuple of int
|
|
160
|
+
Chunk shape (sets the per-read column size).
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
np.ndarray
|
|
165
|
+
``(N, 2)`` int64 array of unique label pairs touching across a
|
|
166
|
+
boundary.
|
|
108
167
|
"""
|
|
109
168
|
root = zarr.open_group(zarr_path, mode="r")
|
|
110
169
|
arr = root[component]
|
|
@@ -135,7 +194,20 @@ def _scan_touching_pairs(
|
|
|
135
194
|
|
|
136
195
|
|
|
137
196
|
def _build_relabel_lut(pairs: np.ndarray, max_label: int) -> np.ndarray:
|
|
138
|
-
"""
|
|
197
|
+
"""Build a relabel LUT from touching pairs via connected components.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
pairs : np.ndarray
|
|
202
|
+
``(N, 2)`` array of touching label pairs.
|
|
203
|
+
max_label : int
|
|
204
|
+
Largest label id present.
|
|
205
|
+
|
|
206
|
+
Returns
|
|
207
|
+
-------
|
|
208
|
+
np.ndarray
|
|
209
|
+
Lookup table mapping each old label to its merged (component) id.
|
|
210
|
+
"""
|
|
139
211
|
if max_label > _LUT_WARN_THRESHOLD:
|
|
140
212
|
logger.warning(
|
|
141
213
|
"_build_relabel_lut: max_label=%d → LUT ~%.0f MB. "
|
|
@@ -168,6 +240,24 @@ def _build_relabel_lut(pairs: np.ndarray, max_label: int) -> np.ndarray:
|
|
|
168
240
|
def _create_zarr_label_array(
|
|
169
241
|
group: zarr.Group, name: str, shape: tuple, chunks: tuple
|
|
170
242
|
) -> zarr.Array:
|
|
243
|
+
"""Create (replacing any existing) an int32 label array in *group*.
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
group : zarr.Group
|
|
248
|
+
Parent group.
|
|
249
|
+
name : str
|
|
250
|
+
Array name (may be a nested path).
|
|
251
|
+
shape : tuple
|
|
252
|
+
Array shape.
|
|
253
|
+
chunks : tuple
|
|
254
|
+
Chunk shape.
|
|
255
|
+
|
|
256
|
+
Returns
|
|
257
|
+
-------
|
|
258
|
+
zarr.Array
|
|
259
|
+
The newly created array (works on zarr v2 and v3).
|
|
260
|
+
"""
|
|
171
261
|
if name in group:
|
|
172
262
|
del group[name]
|
|
173
263
|
if _ZARR_V3:
|
|
@@ -192,6 +282,25 @@ def zarr_native_merge(
|
|
|
192
282
|
Scales to 2000+ chunks where the dask_image approach stalls (O(n_chunks²)
|
|
193
283
|
graph). Reads *staged_path/staged_component*, merges touching cross-boundary
|
|
194
284
|
labels, writes result to *out_path/out_component*. No dask task graph.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
staged_path : str
|
|
289
|
+
Path to the staged-labels zarr store.
|
|
290
|
+
staged_component : str
|
|
291
|
+
Component name within the staged store.
|
|
292
|
+
out_path : str
|
|
293
|
+
Path to the output zarr store.
|
|
294
|
+
out_component : str
|
|
295
|
+
Component name within the output store.
|
|
296
|
+
n_workers : int
|
|
297
|
+
Number of worker processes for the parallel relabel.
|
|
298
|
+
show_progress : bool
|
|
299
|
+
Show a progress bar over the relabel chunks.
|
|
300
|
+
|
|
301
|
+
Returns
|
|
302
|
+
-------
|
|
303
|
+
None
|
|
195
304
|
"""
|
|
196
305
|
root = zarr.open_group(staged_path, mode="r")
|
|
197
306
|
arr = root[staged_component]
|