patchworks 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchworks-0.2.0/.github/workflows/docs.yml +26 -0
- patchworks-0.2.0/.github/workflows/release.yml +47 -0
- patchworks-0.2.0/.gitignore +9 -0
- patchworks-0.2.0/PKG-INFO +294 -0
- patchworks-0.2.0/README.md +248 -0
- patchworks-0.2.0/cliff.toml +49 -0
- patchworks-0.2.0/docs/api/chunks.md +5 -0
- patchworks-0.2.0/docs/api/cluster.md +3 -0
- patchworks-0.2.0/docs/api/io.md +5 -0
- patchworks-0.2.0/docs/api/merge_tile_labels.md +3 -0
- patchworks-0.2.0/docs/api/plugins/cellpose.md +3 -0
- patchworks-0.2.0/docs/api/relabel.md +5 -0
- patchworks-0.2.0/docs/api/tile_process.md +3 -0
- patchworks-0.2.0/docs/examples/cellpose_2d.md +71 -0
- patchworks-0.2.0/docs/examples/cellpose_2d.py +29 -0
- patchworks-0.2.0/docs/examples/cellpose_3d.md +68 -0
- patchworks-0.2.0/docs/examples/cellpose_3d.py +47 -0
- patchworks-0.2.0/docs/examples/custom.md +84 -0
- patchworks-0.2.0/docs/examples/custom_method.py +47 -0
- patchworks-0.2.0/docs/examples/standalone_merge.md +68 -0
- patchworks-0.2.0/docs/examples/stardist.md +61 -0
- patchworks-0.2.0/docs/examples/stardist_2d.py +42 -0
- patchworks-0.2.0/docs/getting_started.md +197 -0
- patchworks-0.2.0/docs/guide/gpu_distributed.md +78 -0
- patchworks-0.2.0/docs/guide/merging.md +106 -0
- patchworks-0.2.0/docs/guide/pitfalls.md +123 -0
- patchworks-0.2.0/docs/guide/skip_empty.md +88 -0
- patchworks-0.2.0/docs/guide/tiling.md +89 -0
- patchworks-0.2.0/docs/index.md +87 -0
- patchworks-0.2.0/mkdocs.yml +84 -0
- patchworks-0.2.0/pyproject.toml +71 -0
- patchworks-0.2.0/src/patchworks/__init__.py +48 -0
- patchworks-0.2.0/src/patchworks/_chunks.py +258 -0
- patchworks-0.2.0/src/patchworks/_cluster.py +93 -0
- patchworks-0.2.0/src/patchworks/_core.py +352 -0
- patchworks-0.2.0/src/patchworks/_io.py +218 -0
- patchworks-0.2.0/src/patchworks/_merge.py +405 -0
- patchworks-0.2.0/src/patchworks/_relabel.py +83 -0
- patchworks-0.2.0/src/patchworks/plugins/__init__.py +1 -0
- patchworks-0.2.0/src/patchworks/plugins/cellpose.py +188 -0
- patchworks-0.2.0/tests/test_core.py +227 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Deploy docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
deploy:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
cache: pip
|
|
21
|
+
|
|
22
|
+
- name: Install docs dependencies
|
|
23
|
+
run: pip install "mkdocs-material>=9.0" "mkdocstrings[python]>=0.24" .
|
|
24
|
+
|
|
25
|
+
- name: Build and deploy to GitHub Pages
|
|
26
|
+
run: mkdocs gh-deploy --force
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
release:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
with:
|
|
17
|
+
fetch-depth: 0
|
|
18
|
+
|
|
19
|
+
- name: Generate changelog
|
|
20
|
+
uses: orhun/git-cliff-action@v4
|
|
21
|
+
id: cliff
|
|
22
|
+
with:
|
|
23
|
+
config: cliff.toml
|
|
24
|
+
args: --latest --strip header
|
|
25
|
+
|
|
26
|
+
- name: Create GitHub release
|
|
27
|
+
uses: softprops/action-gh-release@v2
|
|
28
|
+
with:
|
|
29
|
+
body: ${{ steps.cliff.outputs.content }}
|
|
30
|
+
|
|
31
|
+
pypi:
|
|
32
|
+
needs: release
|
|
33
|
+
runs-on: ubuntu-latest
|
|
34
|
+
permissions:
|
|
35
|
+
id-token: write # OIDC token for PyPI trusted publishing (no API token)
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v4
|
|
38
|
+
|
|
39
|
+
- uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: "3.12"
|
|
42
|
+
|
|
43
|
+
- name: Build sdist and wheel
|
|
44
|
+
run: pipx run build
|
|
45
|
+
|
|
46
|
+
- name: Publish to PyPI
|
|
47
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: patchworks
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Tiled processing of arbitrarily large images with globally consistent labels
|
|
5
|
+
Project-URL: Homepage, https://github.com/imcf/patchworks
|
|
6
|
+
Project-URL: Issues, https://github.com/imcf/patchworks/issues
|
|
7
|
+
Author: IMCF Basel
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: bioimage,chunked,dask,image processing,segmentation,tiling,zarr
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: dask[array]>=2023.1.0
|
|
21
|
+
Requires-Dist: numpy>=1.24
|
|
22
|
+
Requires-Dist: scipy>=1.9
|
|
23
|
+
Requires-Dist: zarr>=2.14
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: nvidia-ml-py; extra == 'all'
|
|
26
|
+
Requires-Dist: psutil; extra == 'all'
|
|
27
|
+
Requires-Dist: scikit-image; extra == 'all'
|
|
28
|
+
Requires-Dist: tqdm; extra == 'all'
|
|
29
|
+
Provides-Extra: cellpose
|
|
30
|
+
Requires-Dist: cellpose>=3.0; extra == 'cellpose'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: psutil; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
35
|
+
Requires-Dist: scikit-image; extra == 'dev'
|
|
36
|
+
Requires-Dist: tqdm; extra == 'dev'
|
|
37
|
+
Provides-Extra: docs
|
|
38
|
+
Requires-Dist: mkdocs-material>=9.0; extra == 'docs'
|
|
39
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
|
|
40
|
+
Provides-Extra: gpu
|
|
41
|
+
Requires-Dist: nvidia-ml-py; extra == 'gpu'
|
|
42
|
+
Provides-Extra: io
|
|
43
|
+
Requires-Dist: psutil; extra == 'io'
|
|
44
|
+
Requires-Dist: tqdm; extra == 'io'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# patchworks
|
|
48
|
+
|
|
49
|
+
> Tiled processing of arbitrarily large images — any image, any function.
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
|
|
53
|
+
│ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
|
|
54
|
+
├──────┼──────┼──────┤ ├──────┼──────┼──────┤
|
|
55
|
+
│ tile │ tile │ tile │ │ 4 │ 5 │ 6 │ globally
|
|
56
|
+
├──────┼──────┼──────┤ ├──────┼──────┼──────┤ consistent
|
|
57
|
+
│ tile │ tile │ tile │ │ 7 │ 8 │ 9 │ labels
|
|
58
|
+
└──────┴──────┴──────┘ └──────┴──────┴──────┘
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
patchworks splits a large image into tiles, runs **any callable** on each
|
|
62
|
+
tile in parallel, and merges the results into a globally consistent label array.
|
|
63
|
+
It handles terabyte-scale images without loading them into memory.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install patchworks
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Optional extras:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install "patchworks[gpu]" # GPU VRAM querying (nvidia-ml-py)
|
|
77
|
+
pip install "patchworks[cellpose]" # Cellpose plugin
|
|
78
|
+
pip install "patchworks[all]" # Everything
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Quick start — 5 lines
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from patchworks import tile_process
|
|
87
|
+
|
|
88
|
+
def my_fn(tile):
|
|
89
|
+
from skimage.filters import threshold_otsu
|
|
90
|
+
from skimage.measure import label
|
|
91
|
+
return label(tile > threshold_otsu(tile)).astype("int32")
|
|
92
|
+
|
|
93
|
+
result = tile_process("image.zarr", my_fn, compute=True)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Done. `result` is a NumPy array of integer labels, same spatial shape as the
|
|
97
|
+
input, with globally unique IDs across all tiles.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## With Cellpose
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from patchworks import tile_process
|
|
105
|
+
from patchworks.plugins.cellpose import cellpose_fn
|
|
106
|
+
|
|
107
|
+
fn = cellpose_fn("cyto3", gpu=True, diameter=30)
|
|
108
|
+
|
|
109
|
+
tile_process(
|
|
110
|
+
"image.zarr", fn,
|
|
111
|
+
tile_shape=(1, 2048, 2048), # one z-slice per tile
|
|
112
|
+
overlap=20, # gives boundary cells enough context
|
|
113
|
+
write_to="labels.zarr", # stream directly to disk — no RAM accumulation
|
|
114
|
+
progress=True,
|
|
115
|
+
)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## With StarDist
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from stardist.models import StarDist2D
|
|
124
|
+
from patchworks import tile_process
|
|
125
|
+
|
|
126
|
+
model = StarDist2D.from_pretrained("2D_versatile_fluo")
|
|
127
|
+
|
|
128
|
+
def stardist_fn(tile):
|
|
129
|
+
img = tile[0] if tile.ndim == 3 and tile.shape[0] == 1 else tile
|
|
130
|
+
norm = img.astype("float32") / (img.max() or 1)
|
|
131
|
+
labels, _ = model.predict_instances(norm)
|
|
132
|
+
return labels.astype("int32")[None] if tile.ndim == 3 else labels.astype("int32")
|
|
133
|
+
|
|
134
|
+
tile_process("image.zarr", stardist_fn,
|
|
135
|
+
tile_shape=(1, 1024, 1024), overlap=32,
|
|
136
|
+
write_to="labels.zarr", progress=True)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
## With any function
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
import numpy as np
|
|
145
|
+
from scipy.ndimage import gaussian_filter
|
|
146
|
+
from skimage.measure import label
|
|
147
|
+
from patchworks import tile_process
|
|
148
|
+
|
|
149
|
+
def my_custom_fn(tile: np.ndarray) -> np.ndarray:
|
|
150
|
+
smoothed = gaussian_filter(tile.astype("float32"), sigma=1.5)
|
|
151
|
+
binary = smoothed > smoothed.mean()
|
|
152
|
+
return label(binary).astype("int32")
|
|
153
|
+
|
|
154
|
+
tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Common patterns
|
|
160
|
+
|
|
161
|
+
### Auto-size tiles from available memory
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
from patchworks import tile_process
|
|
165
|
+
|
|
166
|
+
tile_process("image.zarr", fn, tile_shape="auto", use_gpu=True)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Skip empty tiles (sparse volumes)
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from patchworks import estimate_empty_tiles, tile_process
|
|
173
|
+
|
|
174
|
+
info = estimate_empty_tiles("image.zarr", tile_shape=(120, 697, 697))
|
|
175
|
+
print(f"{info['empty_fraction']:.0%} tiles are background — will be skipped")
|
|
176
|
+
|
|
177
|
+
tile_process("image.zarr", fn,
|
|
178
|
+
tile_shape=(120, 697, 697),
|
|
179
|
+
skip_empty=True,
|
|
180
|
+
empty_threshold=info["threshold"],
|
|
181
|
+
write_to="labels.zarr")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Distributed cluster for GPU
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from patchworks import make_local_cluster, tile_process
|
|
188
|
+
|
|
189
|
+
client, cluster = make_local_cluster(use_gpu=True)
|
|
190
|
+
try:
|
|
191
|
+
tile_process("image.zarr", fn, write_to="labels.zarr", progress=True)
|
|
192
|
+
finally:
|
|
193
|
+
client.close(); cluster.close()
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Contiguous label numbering
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
# Labels are globally unique by default, but may be gappy (block-encoded IDs).
|
|
200
|
+
# sequential_labels=True does a linear relabel O(voxels) — not O(n_tiles²).
|
|
201
|
+
tile_process("image.zarr", fn,
|
|
202
|
+
write_to="labels.zarr",
|
|
203
|
+
sequential_labels=True)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Use only the merge step (bring your own tiling)
|
|
207
|
+
|
|
208
|
+
If you already have per-tile labels from your own pipeline, just call the
|
|
209
|
+
merge step directly:
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
import dask.array as da
|
|
213
|
+
import numpy as np
|
|
214
|
+
from patchworks import merge_tile_labels
|
|
215
|
+
|
|
216
|
+
# Your own tiling + segmentation
|
|
217
|
+
image = da.from_zarr("image.zarr").rechunk((1, 1024, 1024))
|
|
218
|
+
labeled = image.map_blocks(my_segment_fn, dtype="int32",
|
|
219
|
+
meta=np.empty((0,) * image.ndim, dtype="int32"))
|
|
220
|
+
|
|
221
|
+
merged = merge_tile_labels(labeled, write_to="labels.zarr", progress=True)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Or merge from a zarr store your pipeline already wrote:
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
from patchworks import merge_tile_labels
|
|
228
|
+
|
|
229
|
+
merged = merge_tile_labels(
|
|
230
|
+
"my_staged_labels.zarr",
|
|
231
|
+
input_component="raw_labels",
|
|
232
|
+
write_to="merged.zarr",
|
|
233
|
+
sequential_labels=True,
|
|
234
|
+
)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## How tiling and merging work
|
|
240
|
+
|
|
241
|
+
See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
|
|
242
|
+
Short version:
|
|
243
|
+
|
|
244
|
+
1. Image is split into tiles (with optional overlap for boundary context).
|
|
245
|
+
2. Your function is called independently on each tile. Dask handles parallelism
|
|
246
|
+
and streaming — tiles are never all in memory at once.
|
|
247
|
+
3. Each tile's labels are written to a temp zarr exactly once (the staging
|
|
248
|
+
step — this prevents your function being called 3-4× per tile during merge).
|
|
249
|
+
4. Thin slabs at each tile boundary are scanned for touching label pairs.
|
|
250
|
+
5. scipy connected components on the pairs → relabeling lookup table.
|
|
251
|
+
6. LUT applied to every tile in parallel → globally consistent labels.
|
|
252
|
+
|
|
253
|
+
The merge is **zarr-native** (no dask task graph), so it scales to thousands of
|
|
254
|
+
tiles where the dask-image approach stalls.
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Known pitfalls (and how patchworks avoids them)
|
|
259
|
+
|
|
260
|
+
| Pitfall | Symptom | How patchworks handles it |
|
|
261
|
+
|---|---|---|
|
|
262
|
+
| In-process Dask client | `FutureCancelledError: lost dependencies` | Detected at startup, raises immediately with fix instructions |
|
|
263
|
+
| 3-4× fn recompute during merge | Cellpose runs 3× per tile | Staging writes labels once, merge reads from disk |
|
|
264
|
+
| O(n²) sequential relabelling | Graph construction hangs at 1000+ tiles | Linear post-pass O(voxels) via `np.unique` + LUT |
|
|
265
|
+
| Wrong overlap boundary | Output shape mismatch | Always uses `boundary="none"` |
|
|
266
|
+
| Persisting large arrays | Worker OOM | Never persists; keeps dask graph lazy and streams |
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## Documentation
|
|
271
|
+
|
|
272
|
+
- [Quick Start](docs/quickstart.md)
|
|
273
|
+
- [API Reference](docs/api-reference.md)
|
|
274
|
+
- [How It Works](docs/how-it-works.md)
|
|
275
|
+
- [Examples](docs/examples/)
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## Requirements
|
|
280
|
+
|
|
281
|
+
- Python ≥ 3.9
|
|
282
|
+
- dask[array], numpy, zarr, scipy
|
|
283
|
+
|
|
284
|
+
Optional:
|
|
285
|
+
- `psutil` — accurate RAM sizing for `tile_shape="auto"`
|
|
286
|
+
- `nvidia-ml-py` — accurate GPU VRAM sizing
|
|
287
|
+
- `tqdm` — progress bars
|
|
288
|
+
- `cellpose` — Cellpose plugin
|
|
289
|
+
|
|
290
|
+
---
|
|
291
|
+
|
|
292
|
+
## License
|
|
293
|
+
|
|
294
|
+
MIT
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# patchworks
|
|
2
|
+
|
|
3
|
+
> Tiled processing of arbitrarily large images — any image, any function.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
|
|
7
|
+
│ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
|
|
8
|
+
├──────┼──────┼──────┤ ├──────┼──────┼──────┤
|
|
9
|
+
│ tile │ tile │ tile │ │ 4 │ 5 │ 6 │ globally
|
|
10
|
+
├──────┼──────┼──────┤ ├──────┼──────┼──────┤ consistent
|
|
11
|
+
│ tile │ tile │ tile │ │ 7 │ 8 │ 9 │ labels
|
|
12
|
+
└──────┴──────┴──────┘ └──────┴──────┴──────┘
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
patchworks splits a large image into tiles, runs **any callable** on each
|
|
16
|
+
tile in parallel, and merges the results into a globally consistent label array.
|
|
17
|
+
It handles terabyte-scale images without loading them into memory.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install patchworks
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Optional extras:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install "patchworks[gpu]" # GPU VRAM querying (nvidia-ml-py)
|
|
31
|
+
pip install "patchworks[cellpose]" # Cellpose plugin
|
|
32
|
+
pip install "patchworks[all]" # Everything
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Quick start — 5 lines
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from patchworks import tile_process
|
|
41
|
+
|
|
42
|
+
def my_fn(tile):
|
|
43
|
+
from skimage.filters import threshold_otsu
|
|
44
|
+
from skimage.measure import label
|
|
45
|
+
return label(tile > threshold_otsu(tile)).astype("int32")
|
|
46
|
+
|
|
47
|
+
result = tile_process("image.zarr", my_fn, compute=True)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Done. `result` is a NumPy array of integer labels, same spatial shape as the
|
|
51
|
+
input, with globally unique IDs across all tiles.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## With Cellpose
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from patchworks import tile_process
|
|
59
|
+
from patchworks.plugins.cellpose import cellpose_fn
|
|
60
|
+
|
|
61
|
+
fn = cellpose_fn("cyto3", gpu=True, diameter=30)
|
|
62
|
+
|
|
63
|
+
tile_process(
|
|
64
|
+
"image.zarr", fn,
|
|
65
|
+
tile_shape=(1, 2048, 2048), # one z-slice per tile
|
|
66
|
+
overlap=20, # gives boundary cells enough context
|
|
67
|
+
write_to="labels.zarr", # stream directly to disk — no RAM accumulation
|
|
68
|
+
progress=True,
|
|
69
|
+
)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## With StarDist
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from stardist.models import StarDist2D
|
|
78
|
+
from patchworks import tile_process
|
|
79
|
+
|
|
80
|
+
model = StarDist2D.from_pretrained("2D_versatile_fluo")
|
|
81
|
+
|
|
82
|
+
def stardist_fn(tile):
|
|
83
|
+
img = tile[0] if tile.ndim == 3 and tile.shape[0] == 1 else tile
|
|
84
|
+
norm = img.astype("float32") / (img.max() or 1)
|
|
85
|
+
labels, _ = model.predict_instances(norm)
|
|
86
|
+
return labels.astype("int32")[None] if tile.ndim == 3 else labels.astype("int32")
|
|
87
|
+
|
|
88
|
+
tile_process("image.zarr", stardist_fn,
|
|
89
|
+
tile_shape=(1, 1024, 1024), overlap=32,
|
|
90
|
+
write_to="labels.zarr", progress=True)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## With any function
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
import numpy as np
|
|
99
|
+
from scipy.ndimage import gaussian_filter
|
|
100
|
+
from skimage.measure import label
|
|
101
|
+
from patchworks import tile_process
|
|
102
|
+
|
|
103
|
+
def my_custom_fn(tile: np.ndarray) -> np.ndarray:
|
|
104
|
+
smoothed = gaussian_filter(tile.astype("float32"), sigma=1.5)
|
|
105
|
+
binary = smoothed > smoothed.mean()
|
|
106
|
+
return label(binary).astype("int32")
|
|
107
|
+
|
|
108
|
+
tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Common patterns
|
|
114
|
+
|
|
115
|
+
### Auto-size tiles from available memory
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from patchworks import tile_process
|
|
119
|
+
|
|
120
|
+
tile_process("image.zarr", fn, tile_shape="auto", use_gpu=True)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Skip empty tiles (sparse volumes)
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from patchworks import estimate_empty_tiles, tile_process
|
|
127
|
+
|
|
128
|
+
info = estimate_empty_tiles("image.zarr", tile_shape=(120, 697, 697))
|
|
129
|
+
print(f"{info['empty_fraction']:.0%} tiles are background — will be skipped")
|
|
130
|
+
|
|
131
|
+
tile_process("image.zarr", fn,
|
|
132
|
+
tile_shape=(120, 697, 697),
|
|
133
|
+
skip_empty=True,
|
|
134
|
+
empty_threshold=info["threshold"],
|
|
135
|
+
write_to="labels.zarr")
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Distributed cluster for GPU
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from patchworks import make_local_cluster, tile_process
|
|
142
|
+
|
|
143
|
+
client, cluster = make_local_cluster(use_gpu=True)
|
|
144
|
+
try:
|
|
145
|
+
tile_process("image.zarr", fn, write_to="labels.zarr", progress=True)
|
|
146
|
+
finally:
|
|
147
|
+
client.close(); cluster.close()
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Contiguous label numbering
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
# Labels are globally unique by default, but may be gappy (block-encoded IDs).
|
|
154
|
+
# sequential_labels=True does a linear relabel O(voxels) — not O(n_tiles²).
|
|
155
|
+
tile_process("image.zarr", fn,
|
|
156
|
+
write_to="labels.zarr",
|
|
157
|
+
sequential_labels=True)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Use only the merge step (bring your own tiling)
|
|
161
|
+
|
|
162
|
+
If you already have per-tile labels from your own pipeline, just call the
|
|
163
|
+
merge step directly:
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
import dask.array as da
|
|
167
|
+
import numpy as np
|
|
168
|
+
from patchworks import merge_tile_labels
|
|
169
|
+
|
|
170
|
+
# Your own tiling + segmentation
|
|
171
|
+
image = da.from_zarr("image.zarr").rechunk((1, 1024, 1024))
|
|
172
|
+
labeled = image.map_blocks(my_segment_fn, dtype="int32",
|
|
173
|
+
meta=np.empty((0,) * image.ndim, dtype="int32"))
|
|
174
|
+
|
|
175
|
+
merged = merge_tile_labels(labeled, write_to="labels.zarr", progress=True)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
Or merge from a zarr store your pipeline already wrote:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from patchworks import merge_tile_labels
|
|
182
|
+
|
|
183
|
+
merged = merge_tile_labels(
|
|
184
|
+
"my_staged_labels.zarr",
|
|
185
|
+
input_component="raw_labels",
|
|
186
|
+
write_to="merged.zarr",
|
|
187
|
+
sequential_labels=True,
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## How tiling and merging work
|
|
194
|
+
|
|
195
|
+
See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
|
|
196
|
+
Short version:
|
|
197
|
+
|
|
198
|
+
1. Image is split into tiles (with optional overlap for boundary context).
|
|
199
|
+
2. Your function is called independently on each tile. Dask handles parallelism
|
|
200
|
+
and streaming — tiles are never all in memory at once.
|
|
201
|
+
3. Each tile's labels are written to a temp zarr exactly once (the staging
|
|
202
|
+
step — this prevents your function being called 3-4× per tile during merge).
|
|
203
|
+
4. Thin slabs at each tile boundary are scanned for touching label pairs.
|
|
204
|
+
5. scipy connected components on the pairs → relabeling lookup table.
|
|
205
|
+
6. LUT applied to every tile in parallel → globally consistent labels.
|
|
206
|
+
|
|
207
|
+
The merge is **zarr-native** (no dask task graph), so it scales to thousands of
|
|
208
|
+
tiles where the dask-image approach stalls.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Known pitfalls (and how patchworks avoids them)
|
|
213
|
+
|
|
214
|
+
| Pitfall | Symptom | How patchworks handles it |
|
|
215
|
+
|---|---|---|
|
|
216
|
+
| In-process Dask client | `FutureCancelledError: lost dependencies` | Detected at startup, raises immediately with fix instructions |
|
|
217
|
+
| 3-4× fn recompute during merge | Cellpose runs 3× per tile | Staging writes labels once, merge reads from disk |
|
|
218
|
+
| O(n²) sequential relabelling | Graph construction hangs at 1000+ tiles | Linear post-pass O(voxels) via `np.unique` + LUT |
|
|
219
|
+
| Wrong overlap boundary | Output shape mismatch | Always uses `boundary="none"` |
|
|
220
|
+
| Persisting large arrays | Worker OOM | Never persists; keeps dask graph lazy and streams |
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Documentation
|
|
225
|
+
|
|
226
|
+
- [Quick Start](docs/quickstart.md)
|
|
227
|
+
- [API Reference](docs/api-reference.md)
|
|
228
|
+
- [How It Works](docs/how-it-works.md)
|
|
229
|
+
- [Examples](docs/examples/)
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
## Requirements
|
|
234
|
+
|
|
235
|
+
- Python ≥ 3.9
|
|
236
|
+
- dask[array], numpy, zarr, scipy
|
|
237
|
+
|
|
238
|
+
Optional:
|
|
239
|
+
- `psutil` — accurate RAM sizing for `tile_shape="auto"`
|
|
240
|
+
- `nvidia-ml-py` — accurate GPU VRAM sizing
|
|
241
|
+
- `tqdm` — progress bars
|
|
242
|
+
- `cellpose` — Cellpose plugin
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[changelog]
|
|
2
|
+
header = ""
|
|
3
|
+
body = """
|
|
4
|
+
{% for group, commits in commits | group_by(attribute="group") %}\
|
|
5
|
+
### {{ group }}
|
|
6
|
+
|
|
7
|
+
{% for commit in commits %}\
|
|
8
|
+
- {% if commit.scope %}**{{ commit.scope }}**: {% endif %}{{ commit.message | split(pat="\n") | first }}
|
|
9
|
+
{% endfor %}
|
|
10
|
+
{% endfor %}\
|
|
11
|
+
{% set contributors = commits | map(attribute="author.name") | unique | sort -%}
|
|
12
|
+
{% if contributors | length > 0 %}
|
|
13
|
+
### 👥 Contributors
|
|
14
|
+
|
|
15
|
+
{% for name in contributors %}\
|
|
16
|
+
- {{ name }}
|
|
17
|
+
{% endfor %}
|
|
18
|
+
{% endif %}\
|
|
19
|
+
"""
|
|
20
|
+
trim = true
|
|
21
|
+
footer = ""
|
|
22
|
+
|
|
23
|
+
[git]
|
|
24
|
+
conventional_commits = true
|
|
25
|
+
filter_unconventional = false
|
|
26
|
+
split_commits = false
|
|
27
|
+
commit_preprocessors = [
|
|
28
|
+
# Strip leading emoji from the description (after "type(scope): ")
|
|
29
|
+
{ pattern = "(\\w+(?:\\([^)]*\\))?!?:\\s+)[^\\w\\s]+\\s*", replace = "$1" },
|
|
30
|
+
]
|
|
31
|
+
commit_parsers = [
|
|
32
|
+
{ message = "^feat", group = "✨ Features" },
|
|
33
|
+
{ message = "^fix", group = "🐛 Bug Fixes" },
|
|
34
|
+
{ message = "^perf", group = "⚡ Performance" },
|
|
35
|
+
{ message = "^refactor", group = "♻️ Refactoring" },
|
|
36
|
+
{ message = "^docs", group = "📝 Documentation" },
|
|
37
|
+
{ message = "^test", group = "✅ Tests" },
|
|
38
|
+
{ message = "^build", group = "🏗️ Build" },
|
|
39
|
+
{ message = "^ci", group = "👷 CI" },
|
|
40
|
+
{ message = "^revert", group = "⏪ Reverts" },
|
|
41
|
+
{ message = "^chore\\(version\\)|^chore: bump version", skip = true },
|
|
42
|
+
{ message = "^chore", group = "🔧 Chores" },
|
|
43
|
+
{ message = "^wip", skip = true },
|
|
44
|
+
{ message = "^style", skip = true },
|
|
45
|
+
{ message = ".*", group = "🔄 Other" },
|
|
46
|
+
]
|
|
47
|
+
filter_commits = true
|
|
48
|
+
tag_pattern = "v[0-9].*"
|
|
49
|
+
sort_commits = "oldest"
|