patchworks 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. patchworks-0.2.0/.github/workflows/docs.yml +26 -0
  2. patchworks-0.2.0/.github/workflows/release.yml +47 -0
  3. patchworks-0.2.0/.gitignore +9 -0
  4. patchworks-0.2.0/PKG-INFO +294 -0
  5. patchworks-0.2.0/README.md +248 -0
  6. patchworks-0.2.0/cliff.toml +49 -0
  7. patchworks-0.2.0/docs/api/chunks.md +5 -0
  8. patchworks-0.2.0/docs/api/cluster.md +3 -0
  9. patchworks-0.2.0/docs/api/io.md +5 -0
  10. patchworks-0.2.0/docs/api/merge_tile_labels.md +3 -0
  11. patchworks-0.2.0/docs/api/plugins/cellpose.md +3 -0
  12. patchworks-0.2.0/docs/api/relabel.md +5 -0
  13. patchworks-0.2.0/docs/api/tile_process.md +3 -0
  14. patchworks-0.2.0/docs/examples/cellpose_2d.md +71 -0
  15. patchworks-0.2.0/docs/examples/cellpose_2d.py +29 -0
  16. patchworks-0.2.0/docs/examples/cellpose_3d.md +68 -0
  17. patchworks-0.2.0/docs/examples/cellpose_3d.py +47 -0
  18. patchworks-0.2.0/docs/examples/custom.md +84 -0
  19. patchworks-0.2.0/docs/examples/custom_method.py +47 -0
  20. patchworks-0.2.0/docs/examples/standalone_merge.md +68 -0
  21. patchworks-0.2.0/docs/examples/stardist.md +61 -0
  22. patchworks-0.2.0/docs/examples/stardist_2d.py +42 -0
  23. patchworks-0.2.0/docs/getting_started.md +197 -0
  24. patchworks-0.2.0/docs/guide/gpu_distributed.md +78 -0
  25. patchworks-0.2.0/docs/guide/merging.md +106 -0
  26. patchworks-0.2.0/docs/guide/pitfalls.md +123 -0
  27. patchworks-0.2.0/docs/guide/skip_empty.md +88 -0
  28. patchworks-0.2.0/docs/guide/tiling.md +89 -0
  29. patchworks-0.2.0/docs/index.md +87 -0
  30. patchworks-0.2.0/mkdocs.yml +84 -0
  31. patchworks-0.2.0/pyproject.toml +71 -0
  32. patchworks-0.2.0/src/patchworks/__init__.py +48 -0
  33. patchworks-0.2.0/src/patchworks/_chunks.py +258 -0
  34. patchworks-0.2.0/src/patchworks/_cluster.py +93 -0
  35. patchworks-0.2.0/src/patchworks/_core.py +352 -0
  36. patchworks-0.2.0/src/patchworks/_io.py +218 -0
  37. patchworks-0.2.0/src/patchworks/_merge.py +405 -0
  38. patchworks-0.2.0/src/patchworks/_relabel.py +83 -0
  39. patchworks-0.2.0/src/patchworks/plugins/__init__.py +1 -0
  40. patchworks-0.2.0/src/patchworks/plugins/cellpose.py +188 -0
  41. patchworks-0.2.0/tests/test_core.py +227 -0
@@ -0,0 +1,26 @@
1
+ name: Deploy docs
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ deploy:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+ cache: pip
21
+
22
+ - name: Install docs dependencies
23
+ run: pip install "mkdocs-material>=9.0" "mkdocstrings[python]>=0.24" .
24
+
25
+ - name: Build and deploy to GitHub Pages
26
+ run: mkdocs gh-deploy --force
@@ -0,0 +1,47 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ release:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ with:
17
+ fetch-depth: 0
18
+
19
+ - name: Generate changelog
20
+ uses: orhun/git-cliff-action@v4
21
+ id: cliff
22
+ with:
23
+ config: cliff.toml
24
+ args: --latest --strip header
25
+
26
+ - name: Create GitHub release
27
+ uses: softprops/action-gh-release@v2
28
+ with:
29
+ body: ${{ steps.cliff.outputs.content }}
30
+
31
+ pypi:
32
+ needs: release
33
+ runs-on: ubuntu-latest
34
+ permissions:
35
+ id-token: write # OIDC token for PyPI trusted publishing (no API token)
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+
39
+ - uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.12"
42
+
43
+ - name: Build sdist and wheel
44
+ run: pipx run build
45
+
46
+ - name: Publish to PyPI
47
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,9 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .eggs/
7
+ site/
8
+ .pytest_cache/
9
+ *.log
@@ -0,0 +1,294 @@
1
+ Metadata-Version: 2.4
2
+ Name: patchworks
3
+ Version: 0.2.0
4
+ Summary: Tiled processing of arbitrarily large images with globally consistent labels
5
+ Project-URL: Homepage, https://github.com/imcf/patchworks
6
+ Project-URL: Issues, https://github.com/imcf/patchworks/issues
7
+ Author: IMCF Basel
8
+ License: MIT
9
+ Keywords: bioimage,chunked,dask,image processing,segmentation,tiling,zarr
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: dask[array]>=2023.1.0
21
+ Requires-Dist: numpy>=1.24
22
+ Requires-Dist: scipy>=1.9
23
+ Requires-Dist: zarr>=2.14
24
+ Provides-Extra: all
25
+ Requires-Dist: nvidia-ml-py; extra == 'all'
26
+ Requires-Dist: psutil; extra == 'all'
27
+ Requires-Dist: scikit-image; extra == 'all'
28
+ Requires-Dist: tqdm; extra == 'all'
29
+ Provides-Extra: cellpose
30
+ Requires-Dist: cellpose>=3.0; extra == 'cellpose'
31
+ Provides-Extra: dev
32
+ Requires-Dist: psutil; extra == 'dev'
33
+ Requires-Dist: pytest; extra == 'dev'
34
+ Requires-Dist: pytest-cov; extra == 'dev'
35
+ Requires-Dist: scikit-image; extra == 'dev'
36
+ Requires-Dist: tqdm; extra == 'dev'
37
+ Provides-Extra: docs
38
+ Requires-Dist: mkdocs-material>=9.0; extra == 'docs'
39
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
40
+ Provides-Extra: gpu
41
+ Requires-Dist: nvidia-ml-py; extra == 'gpu'
42
+ Provides-Extra: io
43
+ Requires-Dist: psutil; extra == 'io'
44
+ Requires-Dist: tqdm; extra == 'io'
45
+ Description-Content-Type: text/markdown
46
+
47
+ # patchworks
48
+
49
+ > Tiled processing of arbitrarily large images — any image, any function.
50
+
51
+ ```
52
+ ┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
53
+ │ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
54
+ ├──────┼──────┼──────┤ ├──────┼──────┼──────┤
55
+ │ tile │ tile │ tile │ │ 4 │ 5 │ 6 │ globally
56
+ ├──────┼──────┼──────┤ ├──────┼──────┼──────┤ consistent
57
+ │ tile │ tile │ tile │ │ 7 │ 8 │ 9 │ labels
58
+ └──────┴──────┴──────┘ └──────┴──────┴──────┘
59
+ ```
60
+
61
+ patchworks splits a large image into tiles, runs **any callable** on each
62
+ tile in parallel, and merges the results into a globally consistent label array.
63
+ It handles terabyte-scale images without loading them into memory.
64
+
65
+ ---
66
+
67
+ ## Installation
68
+
69
+ ```bash
70
+ pip install patchworks
71
+ ```
72
+
73
+ Optional extras:
74
+
75
+ ```bash
76
+ pip install "patchworks[gpu]" # GPU VRAM querying (nvidia-ml-py)
77
+ pip install "patchworks[cellpose]" # Cellpose plugin
78
+ pip install "patchworks[all]" # Everything
79
+ ```
80
+
81
+ ---
82
+
83
+ ## Quick start — 5 lines
84
+
85
+ ```python
86
+ from patchworks import tile_process
87
+
88
+ def my_fn(tile):
89
+ from skimage.filters import threshold_otsu
90
+ from skimage.measure import label
91
+ return label(tile > threshold_otsu(tile)).astype("int32")
92
+
93
+ result = tile_process("image.zarr", my_fn, compute=True)
94
+ ```
95
+
96
+ Done. `result` is a NumPy array of integer labels, same spatial shape as the
97
+ input, with globally unique IDs across all tiles.
98
+
99
+ ---
100
+
101
+ ## With Cellpose
102
+
103
+ ```python
104
+ from patchworks import tile_process
105
+ from patchworks.plugins.cellpose import cellpose_fn
106
+
107
+ fn = cellpose_fn("cyto3", gpu=True, diameter=30)
108
+
109
+ tile_process(
110
+ "image.zarr", fn,
111
+ tile_shape=(1, 2048, 2048), # one z-slice per tile
112
+ overlap=20, # gives boundary cells enough context
113
+ write_to="labels.zarr", # stream directly to disk — no RAM accumulation
114
+ progress=True,
115
+ )
116
+ ```
117
+
118
+ ---
119
+
120
+ ## With StarDist
121
+
122
+ ```python
123
+ from stardist.models import StarDist2D
124
+ from patchworks import tile_process
125
+
126
+ model = StarDist2D.from_pretrained("2D_versatile_fluo")
127
+
128
+ def stardist_fn(tile):
129
+ img = tile[0] if tile.ndim == 3 and tile.shape[0] == 1 else tile
130
+ norm = img.astype("float32") / (img.max() or 1)
131
+ labels, _ = model.predict_instances(norm)
132
+ return labels.astype("int32")[None] if tile.ndim == 3 else labels.astype("int32")
133
+
134
+ tile_process("image.zarr", stardist_fn,
135
+ tile_shape=(1, 1024, 1024), overlap=32,
136
+ write_to="labels.zarr", progress=True)
137
+ ```
138
+
139
+ ---
140
+
141
+ ## With any function
142
+
143
+ ```python
144
+ import numpy as np
145
+ from scipy.ndimage import gaussian_filter
146
+ from skimage.measure import label
147
+ from patchworks import tile_process
148
+
149
+ def my_custom_fn(tile: np.ndarray) -> np.ndarray:
150
+ smoothed = gaussian_filter(tile.astype("float32"), sigma=1.5)
151
+ binary = smoothed > smoothed.mean()
152
+ return label(binary).astype("int32")
153
+
154
+ tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Common patterns
160
+
161
+ ### Auto-size tiles from available memory
162
+
163
+ ```python
164
+ from patchworks import tile_process
165
+
166
+ tile_process("image.zarr", fn, tile_shape="auto", use_gpu=True)
167
+ ```
168
+
169
+ ### Skip empty tiles (sparse volumes)
170
+
171
+ ```python
172
+ from patchworks import estimate_empty_tiles, tile_process
173
+
174
+ info = estimate_empty_tiles("image.zarr", tile_shape=(120, 697, 697))
175
+ print(f"{info['empty_fraction']:.0%} tiles are background — will be skipped")
176
+
177
+ tile_process("image.zarr", fn,
178
+ tile_shape=(120, 697, 697),
179
+ skip_empty=True,
180
+ empty_threshold=info["threshold"],
181
+ write_to="labels.zarr")
182
+ ```
183
+
184
+ ### Distributed cluster for GPU
185
+
186
+ ```python
187
+ from patchworks import make_local_cluster, tile_process
188
+
189
+ client, cluster = make_local_cluster(use_gpu=True)
190
+ try:
191
+ tile_process("image.zarr", fn, write_to="labels.zarr", progress=True)
192
+ finally:
193
+ client.close(); cluster.close()
194
+ ```
195
+
196
+ ### Contiguous label numbering
197
+
198
+ ```python
199
+ # Labels are globally unique by default, but may be gappy (block-encoded IDs).
200
+ # sequential_labels=True does a linear relabel O(voxels) — not O(n_tiles²).
201
+ tile_process("image.zarr", fn,
202
+ write_to="labels.zarr",
203
+ sequential_labels=True)
204
+ ```
205
+
206
+ ### Use only the merge step (bring your own tiling)
207
+
208
+ If you already have per-tile labels from your own pipeline, just call the
209
+ merge step directly:
210
+
211
+ ```python
212
+ import dask.array as da
213
+ import numpy as np
214
+ from patchworks import merge_tile_labels
215
+
216
+ # Your own tiling + segmentation
217
+ image = da.from_zarr("image.zarr").rechunk((1, 1024, 1024))
218
+ labeled = image.map_blocks(my_segment_fn, dtype="int32",
219
+ meta=np.empty((0,) * image.ndim, dtype="int32"))
220
+
221
+ merged = merge_tile_labels(labeled, write_to="labels.zarr", progress=True)
222
+ ```
223
+
224
+ Or merge from a zarr store your pipeline already wrote:
225
+
226
+ ```python
227
+ from patchworks import merge_tile_labels
228
+
229
+ merged = merge_tile_labels(
230
+ "my_staged_labels.zarr",
231
+ input_component="raw_labels",
232
+ write_to="merged.zarr",
233
+ sequential_labels=True,
234
+ )
235
+ ```
236
+
237
+ ---
238
+
239
+ ## How tiling and merging work
240
+
241
+ See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
242
+ Short version:
243
+
244
+ 1. Image is split into tiles (with optional overlap for boundary context).
245
+ 2. Your function is called independently on each tile. Dask handles parallelism
246
+ and streaming — tiles are never all in memory at once.
247
+ 3. Each tile's labels are written to a temp zarr exactly once (the staging
248
+ step — this prevents your function being called 3-4× per tile during merge).
249
+ 4. Thin slabs at each tile boundary are scanned for touching label pairs.
250
+ 5. scipy connected components on the pairs → relabeling lookup table.
251
+ 6. LUT applied to every tile in parallel → globally consistent labels.
252
+
253
+ The merge is **zarr-native** (no dask task graph), so it scales to thousands of
254
+ tiles where the dask-image approach stalls.
255
+
256
+ ---
257
+
258
+ ## Known pitfalls (and how patchworks avoids them)
259
+
260
+ | Pitfall | Symptom | How patchworks handles it |
261
+ |---|---|---|
262
+ | In-process Dask client | `FutureCancelledError: lost dependencies` | Detected at startup, raises immediately with fix instructions |
263
+ | 3-4× fn recompute during merge | Cellpose runs 3× per tile | Staging writes labels once, merge reads from disk |
264
+ | O(n²) sequential relabelling | Graph construction hangs at 1000+ tiles | Linear post-pass O(voxels) via `np.unique` + LUT |
265
+ | Wrong overlap boundary | Output shape mismatch | Always uses `boundary="none"` |
266
+ | Persisting large arrays | Worker OOM | Never persists; keeps dask graph lazy and streams |
267
+
268
+ ---
269
+
270
+ ## Documentation
271
+
272
+ - [Quick Start](docs/quickstart.md)
273
+ - [API Reference](docs/api-reference.md)
274
+ - [How It Works](docs/how-it-works.md)
275
+ - [Examples](docs/examples/)
276
+
277
+ ---
278
+
279
+ ## Requirements
280
+
281
+ - Python ≥ 3.9
282
+ - dask[array], numpy, zarr, scipy
283
+
284
+ Optional:
285
+ - `psutil` — accurate RAM sizing for `tile_shape="auto"`
286
+ - `nvidia-ml-py` — accurate GPU VRAM sizing
287
+ - `tqdm` — progress bars
288
+ - `cellpose` — Cellpose plugin
289
+
290
+ ---
291
+
292
+ ## License
293
+
294
+ MIT
@@ -0,0 +1,248 @@
1
+ # patchworks
2
+
3
+ > Tiled processing of arbitrarily large images — any image, any function.
4
+
5
+ ```
6
+ ┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
7
+ │ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
8
+ ├──────┼──────┼──────┤ ├──────┼──────┼──────┤
9
+ │ tile │ tile │ tile │ │ 4 │ 5 │ 6 │ globally
10
+ ├──────┼──────┼──────┤ ├──────┼──────┼──────┤ consistent
11
+ │ tile │ tile │ tile │ │ 7 │ 8 │ 9 │ labels
12
+ └──────┴──────┴──────┘ └──────┴──────┴──────┘
13
+ ```
14
+
15
+ patchworks splits a large image into tiles, runs **any callable** on each
16
+ tile in parallel, and merges the results into a globally consistent label array.
17
+ It handles terabyte-scale images without loading them into memory.
18
+
19
+ ---
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install patchworks
25
+ ```
26
+
27
+ Optional extras:
28
+
29
+ ```bash
30
+ pip install "patchworks[gpu]" # GPU VRAM querying (nvidia-ml-py)
31
+ pip install "patchworks[cellpose]" # Cellpose plugin
32
+ pip install "patchworks[all]" # Everything
33
+ ```
34
+
35
+ ---
36
+
37
+ ## Quick start — 5 lines
38
+
39
+ ```python
40
+ from patchworks import tile_process
41
+
42
+ def my_fn(tile):
43
+ from skimage.filters import threshold_otsu
44
+ from skimage.measure import label
45
+ return label(tile > threshold_otsu(tile)).astype("int32")
46
+
47
+ result = tile_process("image.zarr", my_fn, compute=True)
48
+ ```
49
+
50
+ Done. `result` is a NumPy array of integer labels, same spatial shape as the
51
+ input, with globally unique IDs across all tiles.
52
+
53
+ ---
54
+
55
+ ## With Cellpose
56
+
57
+ ```python
58
+ from patchworks import tile_process
59
+ from patchworks.plugins.cellpose import cellpose_fn
60
+
61
+ fn = cellpose_fn("cyto3", gpu=True, diameter=30)
62
+
63
+ tile_process(
64
+ "image.zarr", fn,
65
+ tile_shape=(1, 2048, 2048), # one z-slice per tile
66
+ overlap=20, # gives boundary cells enough context
67
+ write_to="labels.zarr", # stream directly to disk — no RAM accumulation
68
+ progress=True,
69
+ )
70
+ ```
71
+
72
+ ---
73
+
74
+ ## With StarDist
75
+
76
+ ```python
77
+ from stardist.models import StarDist2D
78
+ from patchworks import tile_process
79
+
80
+ model = StarDist2D.from_pretrained("2D_versatile_fluo")
81
+
82
+ def stardist_fn(tile):
83
+ img = tile[0] if tile.ndim == 3 and tile.shape[0] == 1 else tile
84
+ norm = img.astype("float32") / (img.max() or 1)
85
+ labels, _ = model.predict_instances(norm)
86
+ return labels.astype("int32")[None] if tile.ndim == 3 else labels.astype("int32")
87
+
88
+ tile_process("image.zarr", stardist_fn,
89
+ tile_shape=(1, 1024, 1024), overlap=32,
90
+ write_to="labels.zarr", progress=True)
91
+ ```
92
+
93
+ ---
94
+
95
+ ## With any function
96
+
97
+ ```python
98
+ import numpy as np
99
+ from scipy.ndimage import gaussian_filter
100
+ from skimage.measure import label
101
+ from patchworks import tile_process
102
+
103
+ def my_custom_fn(tile: np.ndarray) -> np.ndarray:
104
+ smoothed = gaussian_filter(tile.astype("float32"), sigma=1.5)
105
+ binary = smoothed > smoothed.mean()
106
+ return label(binary).astype("int32")
107
+
108
+ tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Common patterns
114
+
115
+ ### Auto-size tiles from available memory
116
+
117
+ ```python
118
+ from patchworks import tile_process
119
+
120
+ tile_process("image.zarr", fn, tile_shape="auto", use_gpu=True)
121
+ ```
122
+
123
+ ### Skip empty tiles (sparse volumes)
124
+
125
+ ```python
126
+ from patchworks import estimate_empty_tiles, tile_process
127
+
128
+ info = estimate_empty_tiles("image.zarr", tile_shape=(120, 697, 697))
129
+ print(f"{info['empty_fraction']:.0%} tiles are background — will be skipped")
130
+
131
+ tile_process("image.zarr", fn,
132
+ tile_shape=(120, 697, 697),
133
+ skip_empty=True,
134
+ empty_threshold=info["threshold"],
135
+ write_to="labels.zarr")
136
+ ```
137
+
138
+ ### Distributed cluster for GPU
139
+
140
+ ```python
141
+ from patchworks import make_local_cluster, tile_process
142
+
143
+ client, cluster = make_local_cluster(use_gpu=True)
144
+ try:
145
+ tile_process("image.zarr", fn, write_to="labels.zarr", progress=True)
146
+ finally:
147
+ client.close(); cluster.close()
148
+ ```
149
+
150
+ ### Contiguous label numbering
151
+
152
+ ```python
153
+ # Labels are globally unique by default, but may be gappy (block-encoded IDs).
154
+ # sequential_labels=True does a linear relabel O(voxels) — not O(n_tiles²).
155
+ tile_process("image.zarr", fn,
156
+ write_to="labels.zarr",
157
+ sequential_labels=True)
158
+ ```
159
+
160
+ ### Use only the merge step (bring your own tiling)
161
+
162
+ If you already have per-tile labels from your own pipeline, just call the
163
+ merge step directly:
164
+
165
+ ```python
166
+ import dask.array as da
167
+ import numpy as np
168
+ from patchworks import merge_tile_labels
169
+
170
+ # Your own tiling + segmentation
171
+ image = da.from_zarr("image.zarr").rechunk((1, 1024, 1024))
172
+ labeled = image.map_blocks(my_segment_fn, dtype="int32",
173
+ meta=np.empty((0,) * image.ndim, dtype="int32"))
174
+
175
+ merged = merge_tile_labels(labeled, write_to="labels.zarr", progress=True)
176
+ ```
177
+
178
+ Or merge from a zarr store your pipeline already wrote:
179
+
180
+ ```python
181
+ from patchworks import merge_tile_labels
182
+
183
+ merged = merge_tile_labels(
184
+ "my_staged_labels.zarr",
185
+ input_component="raw_labels",
186
+ write_to="merged.zarr",
187
+ sequential_labels=True,
188
+ )
189
+ ```
190
+
191
+ ---
192
+
193
+ ## How tiling and merging work
194
+
195
+ See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
196
+ Short version:
197
+
198
+ 1. Image is split into tiles (with optional overlap for boundary context).
199
+ 2. Your function is called independently on each tile. Dask handles parallelism
200
+ and streaming — tiles are never all in memory at once.
201
+ 3. Each tile's labels are written to a temp zarr exactly once (the staging
202
+ step — this prevents your function being called 3-4× per tile during merge).
203
+ 4. Thin slabs at each tile boundary are scanned for touching label pairs.
204
+ 5. scipy connected components on the pairs → relabeling lookup table.
205
+ 6. LUT applied to every tile in parallel → globally consistent labels.
206
+
207
+ The merge is **zarr-native** (no dask task graph), so it scales to thousands of
208
+ tiles where the dask-image approach stalls.
209
+
210
+ ---
211
+
212
+ ## Known pitfalls (and how patchworks avoids them)
213
+
214
+ | Pitfall | Symptom | How patchworks handles it |
215
+ |---|---|---|
216
+ | In-process Dask client | `FutureCancelledError: lost dependencies` | Detected at startup, raises immediately with fix instructions |
217
+ | 3-4× fn recompute during merge | Cellpose runs 3× per tile | Staging writes labels once, merge reads from disk |
218
+ | O(n²) sequential relabelling | Graph construction hangs at 1000+ tiles | Linear post-pass O(voxels) via `np.unique` + LUT |
219
+ | Wrong overlap boundary | Output shape mismatch | Always uses `boundary="none"` |
220
+ | Persisting large arrays | Worker OOM | Never persists; keeps dask graph lazy and streams |
221
+
222
+ ---
223
+
224
+ ## Documentation
225
+
226
+ - [Quick Start](docs/quickstart.md)
227
+ - [API Reference](docs/api-reference.md)
228
+ - [How It Works](docs/how-it-works.md)
229
+ - [Examples](docs/examples/)
230
+
231
+ ---
232
+
233
+ ## Requirements
234
+
235
+ - Python ≥ 3.9
236
+ - dask[array], numpy, zarr, scipy
237
+
238
+ Optional:
239
+ - `psutil` — accurate RAM sizing for `tile_shape="auto"`
240
+ - `nvidia-ml-py` — accurate GPU VRAM sizing
241
+ - `tqdm` — progress bars
242
+ - `cellpose` — Cellpose plugin
243
+
244
+ ---
245
+
246
+ ## License
247
+
248
+ MIT
@@ -0,0 +1,49 @@
1
+ [changelog]
2
+ header = ""
3
+ body = """
4
+ {% for group, commits in commits | group_by(attribute="group") %}\
5
+ ### {{ group }}
6
+
7
+ {% for commit in commits %}\
8
+ - {% if commit.scope %}**{{ commit.scope }}**: {% endif %}{{ commit.message | split(pat="\n") | first }}
9
+ {% endfor %}
10
+ {% endfor %}\
11
+ {% set contributors = commits | map(attribute="author.name") | unique | sort -%}
12
+ {% if contributors | length > 0 %}
13
+ ### 👥 Contributors
14
+
15
+ {% for name in contributors %}\
16
+ - {{ name }}
17
+ {% endfor %}
18
+ {% endif %}\
19
+ """
20
+ trim = true
21
+ footer = ""
22
+
23
+ [git]
24
+ conventional_commits = true
25
+ filter_unconventional = false
26
+ split_commits = false
27
+ commit_preprocessors = [
28
+ # Strip leading emoji from the description (after "type(scope): ")
29
+ { pattern = "(\\w+(?:\\([^)]*\\))?!?:\\s+)[^\\w\\s]+\\s*", replace = "$1" },
30
+ ]
31
+ commit_parsers = [
32
+ { message = "^feat", group = "✨ Features" },
33
+ { message = "^fix", group = "🐛 Bug Fixes" },
34
+ { message = "^perf", group = "⚡ Performance" },
35
+ { message = "^refactor", group = "♻️ Refactoring" },
36
+ { message = "^docs", group = "📝 Documentation" },
37
+ { message = "^test", group = "✅ Tests" },
38
+ { message = "^build", group = "🏗️ Build" },
39
+ { message = "^ci", group = "👷 CI" },
40
+ { message = "^revert", group = "⏪ Reverts" },
41
+ { message = "^chore\\(version\\)|^chore: bump version", skip = true },
42
+ { message = "^chore", group = "🔧 Chores" },
43
+ { message = "^wip", skip = true },
44
+ { message = "^style", skip = true },
45
+ { message = ".*", group = "🔄 Other" },
46
+ ]
47
+ filter_commits = true
48
+ tag_pattern = "v[0-9].*"
49
+ sort_commits = "oldest"
@@ -0,0 +1,5 @@
1
+ # Tile sizing
2
+
3
+ ::: patchworks.auto_tile_shape
4
+
5
+ ::: patchworks.auto_tile_shape_cellpose
@@ -0,0 +1,3 @@
1
+ # Cluster helpers
2
+
3
+ ::: patchworks.make_local_cluster
@@ -0,0 +1,5 @@
1
+ # I/O helpers
2
+
3
+ ::: patchworks.load_ome_zarr
4
+
5
+ ::: patchworks.estimate_empty_tiles
@@ -0,0 +1,3 @@
1
+ # merge_tile_labels
2
+
3
+ ::: patchworks.merge_tile_labels
@@ -0,0 +1,3 @@
1
+ # Cellpose plugin
2
+
3
+ ::: patchworks.plugins.cellpose.cellpose_fn