patchworks 0.5.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {patchworks-0.5.0 → patchworks-0.6.0}/.github/workflows/release.yml +14 -0
  2. {patchworks-0.5.0 → patchworks-0.6.0}/PKG-INFO +44 -11
  3. {patchworks-0.5.0 → patchworks-0.6.0}/README.md +43 -10
  4. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/custom.md +3 -3
  5. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/custom_method.py +1 -2
  6. {patchworks-0.5.0 → patchworks-0.6.0}/docs/getting_started.md +5 -3
  7. patchworks-0.6.0/docs/guide/performance.md +60 -0
  8. {patchworks-0.5.0 → patchworks-0.6.0}/docs/index.md +1 -1
  9. {patchworks-0.5.0 → patchworks-0.6.0}/mkdocs.yml +1 -0
  10. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_chunks.py +45 -0
  11. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_core.py +48 -42
  12. {patchworks-0.5.0 → patchworks-0.6.0}/tests/test_core.py +24 -0
  13. {patchworks-0.5.0 → patchworks-0.6.0}/.github/workflows/docs.yml +0 -0
  14. {patchworks-0.5.0 → patchworks-0.6.0}/.github/workflows/lint.yml +0 -0
  15. {patchworks-0.5.0 → patchworks-0.6.0}/.gitignore +0 -0
  16. {patchworks-0.5.0 → patchworks-0.6.0}/cliff.toml +0 -0
  17. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/chunks.md +0 -0
  18. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/cluster.md +0 -0
  19. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/io.md +0 -0
  20. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/merge_tile_labels.md +0 -0
  21. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/plugins/cellpose.md +0 -0
  22. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/plugins/napari.md +0 -0
  23. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/plugins/ome_zarr.md +0 -0
  24. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/relabel.md +0 -0
  25. {patchworks-0.5.0 → patchworks-0.6.0}/docs/api/tile_process.md +0 -0
  26. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/cellpose_2d.md +0 -0
  27. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/cellpose_2d.py +0 -0
  28. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/cellpose_3d.md +0 -0
  29. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/cellpose_3d.py +0 -0
  30. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/standalone_merge.md +0 -0
  31. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/stardist.md +0 -0
  32. {patchworks-0.5.0 → patchworks-0.6.0}/docs/examples/stardist_2d.py +0 -0
  33. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/gpu_distributed.md +0 -0
  34. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/merging.md +0 -0
  35. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/ome_zarr_napari.md +0 -0
  36. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/pitfalls.md +0 -0
  37. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/skip_empty.md +0 -0
  38. {patchworks-0.5.0 → patchworks-0.6.0}/docs/guide/tiling.md +0 -0
  39. {patchworks-0.5.0 → patchworks-0.6.0}/pyproject.toml +0 -0
  40. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/__init__.py +0 -0
  41. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_cluster.py +0 -0
  42. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_io.py +0 -0
  43. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_merge.py +0 -0
  44. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/_relabel.py +0 -0
  45. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/plugins/__init__.py +0 -0
  46. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/plugins/cellpose.py +0 -0
  47. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/plugins/napari.py +0 -0
  48. {patchworks-0.5.0 → patchworks-0.6.0}/src/patchworks/plugins/ome_zarr.py +0 -0
  49. {patchworks-0.5.0 → patchworks-0.6.0}/tests/test_napari.py +0 -0
  50. {patchworks-0.5.0 → patchworks-0.6.0}/tests/test_ome_zarr.py +0 -0
@@ -48,3 +48,17 @@ jobs:
48
48
 
49
49
  - name: Publish to PyPI
50
50
  uses: pypa/gh-action-pypi-publish@release/v1
51
+
52
+ # Rebuild the org-wide pdoc apidocs site so it picks up the new version.
53
+ apidocs:
54
+ needs: release
55
+ runs-on: ubuntu-latest
56
+ steps:
57
+ - name: Trigger imcf.github.io apidocs rebuild
58
+ uses: peter-evans/repository-dispatch@v3
59
+ with:
60
+ # Fine-grained PAT with "Contents: write" on imcf/imcf.github.io,
61
+ # stored as the APIDOCS_DISPATCH_TOKEN secret in this repo.
62
+ token: ${{ secrets.APIDOCS_DISPATCH_TOKEN }}
63
+ repository: imcf/imcf.github.io
64
+ event-type: dispatch-event
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchworks
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Tiled processing of arbitrarily large images with globally consistent labels
5
5
  Project-URL: Homepage, https://github.com/imcf/patchworks
6
6
  Project-URL: Issues, https://github.com/imcf/patchworks/issues
@@ -127,11 +127,15 @@ def my_fn(tile):
127
127
  return label(tile > threshold_otsu(tile)).astype("int32")
128
128
 
129
129
 
130
- result = tile_process("image.zarr", my_fn, compute=True)
130
+ result = tile_process("image.zarr", my_fn)
131
131
  ```
132
132
 
133
- Done. `result` is a NumPy array of integer labels, same spatial shape as the
134
- input, with globally unique IDs across all tiles.
133
+ Done. `result` is a **lazy dask array** of integer labels (call `.compute()`
134
+ for a NumPy array), same spatial shape as the input, with globally unique IDs
135
+ across all tiles. By default the labels are also written **into the input
136
+ store** at `image.zarr/labels/labels/` as a multi-scale pyramid, so the image
137
+ and its segmentation live in one OME-ZARR. Pass `write_to="labels.zarr"` to
138
+ write a separate store instead.
135
139
 
136
140
  ---
137
141
 
@@ -203,6 +207,26 @@ tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
203
207
 
204
208
  ---
205
209
 
210
+ ## Convert to OME-ZARR & view in napari
211
+
212
+ Optional plugins close the loop: convert any image (Imaris `.ims`, CZI, LIF,
213
+ ND2, OME-TIFF, … via bioio) to a pyramidal, **calibrated** OME-ZARR, then view
214
+ the image and its labels in napari.
215
+
216
+ ```python
217
+ from patchworks.plugins.ome_zarr import to_ome_zarr
218
+ from patchworks.plugins.napari import view_in_napari
219
+
220
+ to_ome_zarr("scan.ims", "scan.zarr") # lazy, OOM-safe, keeps µm calibration
221
+ view_in_napari("scan.zarr", labels="scan.zarr/labels/labels")
222
+ ```
223
+
224
+ Pyramids downsample **X/Y only** (Z kept full-res) and are built level-by-level
225
+ from disk, so terabyte volumes convert in bounded RAM. See the
226
+ [OME-ZARR & napari guide](https://imcf.one/patchworks/guide/ome_zarr_napari/).
227
+
228
+ ---
229
+
206
230
  ## Common patterns
207
231
 
208
232
  ### Auto-size tiles from available memory
@@ -288,8 +312,8 @@ merged = merge_tile_labels(
288
312
 
289
313
  ## How tiling and merging work
290
314
 
291
- See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
292
- Short version:
315
+ See the [Merging labels guide](https://imcf.one/patchworks/guide/merging/) for
316
+ a full explanation. Short version:
293
317
 
294
318
  1. Image is split into tiles (with optional overlap for boundary context).
295
319
  2. Your function is called independently on each tile. Dask handles parallelism
@@ -319,10 +343,15 @@ tiles where the dask-image approach stalls.
319
343
 
320
344
  ## Documentation
321
345
 
322
- - [Quick Start](docs/quickstart.md)
323
- - [API Reference](docs/api-reference.md)
324
- - [How It Works](docs/how-it-works.md)
325
- - [Examples](docs/examples/)
346
+ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
347
+
348
+ - [Getting Started](https://imcf.one/patchworks/getting_started/)
349
+ - [User Guide](https://imcf.one/patchworks/guide/tiling/) — tiling, merging,
350
+ empty-tile skipping, GPU/distributed, OME-ZARR & napari, pitfalls
351
+ - [Examples](https://imcf.one/patchworks/examples/cellpose_2d/) — Cellpose,
352
+ StarDist, custom functions, standalone merge
353
+ - [API Reference](https://imcf.one/patchworks/api/tile_process/) ·
354
+ [pdoc API](https://imcf.one/apidocs/patchworks/)
326
355
 
327
356
  ---
328
357
 
@@ -335,7 +364,11 @@ Optional:
335
364
  - `psutil` — accurate RAM sizing for `tile_shape="auto"`
336
365
  - `nvidia-ml-py` — accurate GPU VRAM sizing
337
366
  - `tqdm` — progress bars
338
- - `cellpose` — Cellpose plugin
367
+ - `cellpose` — Cellpose plugin (`patchworks[cellpose]`)
368
+ - `bioio` + readers — convert CZI/LIF/ND2/OME-TIFF/… to OME-ZARR
369
+ (`patchworks[bioio]`)
370
+ - `imaris-ims-file-reader` — convert Imaris `.ims` (`patchworks[imaris]`)
371
+ - `napari` — interactive viewer plugin (`patchworks[napari]`)
339
372
 
340
373
  ---
341
374
 
@@ -61,11 +61,15 @@ def my_fn(tile):
61
61
  return label(tile > threshold_otsu(tile)).astype("int32")
62
62
 
63
63
 
64
- result = tile_process("image.zarr", my_fn, compute=True)
64
+ result = tile_process("image.zarr", my_fn)
65
65
  ```
66
66
 
67
- Done. `result` is a NumPy array of integer labels, same spatial shape as the
68
- input, with globally unique IDs across all tiles.
67
+ Done. `result` is a **lazy dask array** of integer labels (call `.compute()`
68
+ for a NumPy array), same spatial shape as the input, with globally unique IDs
69
+ across all tiles. By default the labels are also written **into the input
70
+ store** at `image.zarr/labels/labels/` as a multi-scale pyramid, so the image
71
+ and its segmentation live in one OME-ZARR. Pass `write_to="labels.zarr"` to
72
+ write a separate store instead.
69
73
 
70
74
  ---
71
75
 
@@ -137,6 +141,26 @@ tile_process("image.zarr", my_custom_fn, tile_shape=(1, 512, 512))
137
141
 
138
142
  ---
139
143
 
144
+ ## Convert to OME-ZARR & view in napari
145
+
146
+ Optional plugins close the loop: convert any image (Imaris `.ims`, CZI, LIF,
147
+ ND2, OME-TIFF, … via bioio) to a pyramidal, **calibrated** OME-ZARR, then view
148
+ the image and its labels in napari.
149
+
150
+ ```python
151
+ from patchworks.plugins.ome_zarr import to_ome_zarr
152
+ from patchworks.plugins.napari import view_in_napari
153
+
154
+ to_ome_zarr("scan.ims", "scan.zarr") # lazy, OOM-safe, keeps µm calibration
155
+ view_in_napari("scan.zarr", labels="scan.zarr/labels/labels")
156
+ ```
157
+
158
+ Pyramids downsample **X/Y only** (Z kept full-res) and are built level-by-level
159
+ from disk, so terabyte volumes convert in bounded RAM. See the
160
+ [OME-ZARR & napari guide](https://imcf.one/patchworks/guide/ome_zarr_napari/).
161
+
162
+ ---
163
+
140
164
  ## Common patterns
141
165
 
142
166
  ### Auto-size tiles from available memory
@@ -222,8 +246,8 @@ merged = merge_tile_labels(
222
246
 
223
247
  ## How tiling and merging work
224
248
 
225
- See [docs/how-it-works.md](docs/how-it-works.md) for a full explanation.
226
- Short version:
249
+ See the [Merging labels guide](https://imcf.one/patchworks/guide/merging/) for
250
+ a full explanation. Short version:
227
251
 
228
252
  1. Image is split into tiles (with optional overlap for boundary context).
229
253
  2. Your function is called independently on each tile. Dask handles parallelism
@@ -253,10 +277,15 @@ tiles where the dask-image approach stalls.
253
277
 
254
278
  ## Documentation
255
279
 
256
- - [Quick Start](docs/quickstart.md)
257
- - [API Reference](docs/api-reference.md)
258
- - [How It Works](docs/how-it-works.md)
259
- - [Examples](docs/examples/)
280
+ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
281
+
282
+ - [Getting Started](https://imcf.one/patchworks/getting_started/)
283
+ - [User Guide](https://imcf.one/patchworks/guide/tiling/) — tiling, merging,
284
+ empty-tile skipping, GPU/distributed, OME-ZARR & napari, pitfalls
285
+ - [Examples](https://imcf.one/patchworks/examples/cellpose_2d/) — Cellpose,
286
+ StarDist, custom functions, standalone merge
287
+ - [API Reference](https://imcf.one/patchworks/api/tile_process/) ·
288
+ [pdoc API](https://imcf.one/apidocs/patchworks/)
260
289
 
261
290
  ---
262
291
 
@@ -269,7 +298,11 @@ Optional:
269
298
  - `psutil` — accurate RAM sizing for `tile_shape="auto"`
270
299
  - `nvidia-ml-py` — accurate GPU VRAM sizing
271
300
  - `tqdm` — progress bars
272
- - `cellpose` — Cellpose plugin
301
+ - `cellpose` — Cellpose plugin (`patchworks[cellpose]`)
302
+ - `bioio` + readers — convert CZI/LIF/ND2/OME-TIFF/… to OME-ZARR
303
+ (`patchworks[bioio]`)
304
+ - `imaris-ims-file-reader` — convert Imaris `.ims` (`patchworks[imaris]`)
305
+ - `napari` — interactive viewer plugin (`patchworks[napari]`)
273
306
 
274
307
  ---
275
308
 
@@ -17,7 +17,7 @@ def threshold_fn(tile: np.ndarray) -> np.ndarray:
17
17
  return label(tile > thr).astype("int32")
18
18
 
19
19
 
20
- result = tile_process("image.zarr", threshold_fn, compute=True)
20
+ result = tile_process("image.zarr", threshold_fn)
21
21
  ```
22
22
 
23
23
  ## Gaussian + morphological operations
@@ -86,12 +86,12 @@ from patchworks import tile_process
86
86
 
87
87
  # From any array-like source
88
88
  arr = da.from_array(my_numpy_array, chunks=(1, 1024, 1024))
89
- result = tile_process(arr, my_fn, compute=True)
89
+ result = tile_process(arr, my_fn)
90
90
 
91
91
  # From tifffile
92
92
  import tifffile
93
93
  import dask.array as da
94
94
 
95
95
  arr = da.from_array(tifffile.imread("image.tif", aszarr=True))
96
- result = tile_process(arr, my_fn, compute=True)
96
+ result = tile_process(arr, my_fn)
97
97
  ```
@@ -41,8 +41,7 @@ result = tile_process(
41
41
  my_fn,
42
42
  tile_shape=(1, 512, 512),
43
43
  overlap=16,
44
- compute=True,
45
44
  progress=True,
46
45
  )
47
46
 
48
- print(f"Found {result.max()} objects")
47
+ print(f"Found {int(result.max().compute())} objects")
@@ -89,9 +89,11 @@ objects spanning tile boundaries are merged into a single label.
89
89
  ```python
90
90
  from patchworks import tile_process
91
91
 
92
- result = tile_process("image.zarr", my_fn, compute=True)
92
+ # returns a lazy dask array; labels are also written into image.zarr by
93
+ # default (image.zarr/labels/labels/, as a pyramid)
94
+ result = tile_process("image.zarr", my_fn)
93
95
  print(result.shape) # (z, y, x)
94
- print(result.max()) # number of objects found
96
+ print(int(result.max().compute())) # number of objects found
95
97
  ```
96
98
 
97
99
  === "From a dask array"
@@ -101,7 +103,7 @@ objects spanning tile boundaries are merged into a single label.
101
103
  from patchworks import tile_process
102
104
 
103
105
  arr = da.from_zarr("image.zarr")
104
- result = tile_process(arr, my_fn, compute=True)
106
+ result = tile_process(arr, my_fn)
105
107
  ```
106
108
 
107
109
  === "Stream to zarr (recommended for large images)"
@@ -0,0 +1,60 @@
1
+ # Performance & memory safety
2
+
3
+ `tile_process` is built so a run **adapts to whatever machine it lands on** and
4
+ can't run out of RAM/VRAM or freeze the box — without you tuning anything.
5
+
6
+ ## Automatic, machine-aware concurrency
7
+
8
+ The staging step (running your `fn` once per tile to a temp store) and the
9
+ merge step are sized to the host automatically:
10
+
11
+ - **GPU** (`use_gpu=True`) → **one tile at a time**, so concurrent evaluations
12
+ can never exhaust VRAM.
13
+ - **CPU** → as many tiles in flight as fit **80 % of available RAM** (estimated
14
+ from the tile size), and always **leaving one core free** so the machine
15
+ stays responsive — it never pins every core.
16
+
17
+ The RAM figure is read live via `psutil`; without it, a conservative default is
18
+ used instead of guessing high.
19
+
20
+ ## Overriding the worker count
21
+
22
+ ```python
23
+ from patchworks import tile_process
24
+
25
+ # let patchworks pick (recommended)
26
+ tile_process("scan.zarr", fn)
27
+
28
+ # or cap it yourself (staging threads + merge processes)
29
+ tile_process("scan.zarr", fn, max_workers=8)
30
+ ```
31
+
32
+ `max_workers` bounds both staging and merging. A running **distributed client**
33
+ manages its own concurrency, so the override is skipped there — configure the
34
+ cluster's memory limits instead.
35
+
36
+ ## Why it won't OOM or freeze
37
+
38
+ | Resource | Guard |
39
+ |----------|-------|
40
+ | RAM | concurrent tiles × tile size × overhead ≤ 80 % of available RAM |
41
+ | VRAM | GPU path runs one tile at a time |
42
+ | CPU | always leaves at least one core free |
43
+ | Disk I/O | each pyramid/stage level is streamed chunk-by-chunk; no whole volume in memory |
44
+
45
+ The staging graph itself is kept small — a single fused `map_overlap`
46
+ (halo → `fn` → trim) rather than three separate passes — and there is **no**
47
+ extra read-back of the staged data.
48
+
49
+ ## Getting more speed
50
+
51
+ - `tile_shape="auto"` sizes tiles to free RAM (or VRAM with `use_gpu=True`).
52
+ - `skip_empty=True` with `estimate_empty_tiles()` skips background tiles.
53
+ - A Dask **distributed** cluster (`make_local_cluster`) parallelises across
54
+ workers/GPUs; patchworks then defers concurrency to the cluster.
55
+
56
+ !!! note "What doesn't help here"
57
+ The merge and relabel steps are already vectorised NumPy + SciPy (C-level)
58
+ with no per-voxel Python loop, and the pipeline is I/O-bound — so `numba`,
59
+ `cupy`, `arrow` and `xarray` bring essentially nothing. The real levers are
60
+ tile size, concurrency (above) and zarr chunking.
@@ -53,7 +53,7 @@ def my_fn(tile):
53
53
  return label(tile > threshold_otsu(tile)).astype("int32")
54
54
 
55
55
 
56
- result = tile_process("image.zarr", my_fn, compute=True)
56
+ result = tile_process("image.zarr", my_fn)
57
57
  ```
58
58
 
59
59
  Any function. Any image.
@@ -38,6 +38,7 @@ nav:
38
38
  - Merging labels: guide/merging.md
39
39
  - Empty tile skipping: guide/skip_empty.md
40
40
  - GPU & distributed: guide/gpu_distributed.md
41
+ - Performance & memory: guide/performance.md
41
42
  - OME-ZARR & napari: guide/ome_zarr_napari.md
42
43
  - Pitfalls: guide/pitfalls.md
43
44
  - Examples:
@@ -57,6 +57,51 @@ def _get_available_memory() -> int:
57
57
  return 8 * 1024**3
58
58
 
59
59
 
60
+ def safe_worker_count(
61
+ tile_nbytes: int,
62
+ *,
63
+ use_gpu: bool = False,
64
+ fn_overhead: int = 4,
65
+ ram_fraction: float = 0.8,
66
+ ) -> int:
67
+ """Concurrent tiles that fit the machine without OOM or a CPU freeze.
68
+
69
+ Bounds the threaded scheduler by two limits and takes the smaller:
70
+
71
+ * **CPU** — leaves at least one core free so the box stays responsive
72
+ (never pins every core).
73
+ * **RAM** — at most ``ram_fraction`` of available memory, assuming each
74
+ in-flight tile needs ``fn_overhead`` copies (halo + output + temporaries).
75
+
76
+ On GPU the answer is always 1: one evaluation at a time so concurrent
77
+ tiles can never exhaust VRAM. Without ``psutil`` it returns a conservative
78
+ default rather than guessing high.
79
+
80
+ Parameters
81
+ ----------
82
+ tile_nbytes : int
83
+ Size of one tile in bytes (``prod(tile_shape) * dtype.itemsize``).
84
+ use_gpu : bool, optional
85
+ Whether tiles are processed on the GPU.
86
+ fn_overhead : int, optional
87
+ Assumed peak number of tile-sized buffers alive per worker.
88
+ ram_fraction : float, optional
89
+ Fraction of available RAM the staging step may use.
90
+
91
+ Returns
92
+ -------
93
+ int
94
+ Worker-thread count (always >= 1).
95
+ """
96
+ cpu_cap = max(1, (os.cpu_count() or 1) - 1)
97
+ if use_gpu:
98
+ return 1
99
+ avail = _get_available_memory()
100
+ per_tile = max(1, int(tile_nbytes) * max(1, fn_overhead))
101
+ mem_cap = max(1, int(avail * ram_fraction) // per_tile)
102
+ return max(1, min(cpu_cap, mem_cap))
103
+
104
+
60
105
  def _get_gpu_memory() -> int:
61
106
  """Return free GPU VRAM in bytes. Falls back to 8 GiB default."""
62
107
  try:
@@ -11,7 +11,7 @@ from typing import Any, Callable, Union
11
11
  import dask.array as da
12
12
  import numpy as np
13
13
 
14
- from ._chunks import auto_tile_shape
14
+ from ._chunks import auto_tile_shape, safe_worker_count
15
15
  from ._cluster import _client_is_in_process, _distributed_client
16
16
  from ._io import _auto_empty_threshold, load_ome_zarr
17
17
  from ._merge import zarr_native_merge
@@ -56,6 +56,7 @@ def tile_process(
56
56
  channel: int | None = 0,
57
57
  level: int = 0,
58
58
  use_gpu: bool = False,
59
+ max_workers: int | None = None,
59
60
  progress: bool = False,
60
61
  write_to: Union[str, Path, None] = None,
61
62
  output_component: str = "labels",
@@ -114,6 +115,13 @@ def tile_process(
114
115
  Pyramid level when *image* is a path (0 = full resolution).
115
116
  use_gpu:
116
117
  When ``tile_shape="auto"``, size tiles against GPU VRAM instead of RAM.
118
+ Also forces staging to one tile at a time (no VRAM contention).
119
+ max_workers:
120
+ Cap the worker threads/processes used for staging and merging. ``None``
121
+ (default) auto-sizes to the machine: bounded by available RAM (tile
122
+ size) and CPU (leaves one core free) so a run can neither OOM nor pin
123
+ every core. Ignored when a distributed client is active (it manages its
124
+ own concurrency).
117
125
  progress:
118
126
  Show a progress bar during the tile-writing and relabel steps.
119
127
  write_to:
@@ -283,11 +291,6 @@ def tile_process(
283
291
  for ax, c in enumerate(image.chunks)
284
292
  }
285
293
 
286
- if overlap > 0:
287
- # boundary="none" is required: only this boundary mode composes with
288
- # trim_overlap to recover the original shape. "reflect" keeps the halo.
289
- image = da.overlap.overlap(image, depth=_depth, boundary="none")
290
-
291
294
  # Wrap fn with optional empty-tile skipping
292
295
  _skip_thr = empty_threshold
293
296
  if skip_empty and _skip_thr is None:
@@ -303,28 +306,43 @@ def tile_process(
303
306
  logger.debug("process tile %s shape=%s", loc, block.shape)
304
307
  return fn(block)
305
308
 
306
- labeled = image.map_blocks(
307
- active_fn,
308
- dtype=np.int32,
309
- meta=np.empty((0,) * image.ndim, dtype=np.int32),
310
- )
311
-
312
- # Trim the overlap halo so staged tiles have clean boundaries for the
313
- # boundary-slab scan. Without this the scan reads halo-expanded chunks and
314
- # the merged output is larger than the input.
309
+ _meta = np.empty((0,) * image.ndim, dtype=np.int32)
315
310
  if overlap > 0:
316
- labeled = da.overlap.trim_overlap(
317
- labeled, depth=_depth, boundary="none"
311
+ # One fused pass: add the halo, run fn, trim it back off. map_overlap
312
+ # materialises only the halos it needs (no separate overlapped array)
313
+ # and keeps the task graph small. boundary="none" + trim recovers the
314
+ # original shape, so the boundary-slab scan reads clean tiles.
315
+ labeled = da.map_overlap(
316
+ active_fn,
317
+ image,
318
+ depth=_depth,
319
+ boundary="none",
320
+ trim=True,
321
+ dtype=np.int32,
322
+ meta=_meta,
318
323
  )
324
+ else:
325
+ labeled = image.map_blocks(active_fn, dtype=np.int32, meta=_meta)
319
326
 
320
- # With no distributed client the threaded scheduler runs many tiles at
321
- # once. For GPU that means several evals sharing one device → CUDA OOM.
322
- # Pin to a single worker thread so evals run serially. A distributed
323
- # client manages its own concurrency, so skip the override there.
327
+ # Bound staging concurrency to the machine so it can neither OOM nor pin
328
+ # every core:
329
+ # - GPU 1 eval at a time (no VRAM contention),
330
+ # - CPU as many tiles as fit RAM, leaving one core free.
331
+ # A distributed client manages its own concurrency, so skip the override.
324
332
  import dask as _dask
325
333
 
326
- if _active is None and use_gpu:
327
- _sched_ctx: Any = _dask.config.set(scheduler="threads", num_workers=1)
334
+ _tile_nbytes = int(np.prod(labeled.chunksize)) * labeled.dtype.itemsize
335
+ if _active is None:
336
+ _workers = (
337
+ max_workers
338
+ if max_workers is not None
339
+ else safe_worker_count(_tile_nbytes, use_gpu=use_gpu)
340
+ )
341
+ _workers = max(1, min(_workers, os.cpu_count() or 1))
342
+ logger.info("Staging with %d worker thread(s)", _workers)
343
+ _sched_ctx: Any = _dask.config.set(
344
+ scheduler="threads", num_workers=_workers
345
+ )
328
346
  else:
329
347
  _sched_ctx = _nullcontext()
330
348
 
@@ -347,24 +365,10 @@ def tile_process(
347
365
  _stage_to_zarr(labeled, stage_path, "staged", progress)
348
366
  labeled = da.from_zarr(stage_path, component="staged")
349
367
 
350
- if skip_empty and _skip_thr is not None:
351
-
352
- def _tile_max(block: np.ndarray) -> np.ndarray:
353
- return np.full((1,) * block.ndim, int(block.max()), dtype=np.int32)
354
-
355
- _tile_maxes = labeled.map_blocks(
356
- _tile_max,
357
- dtype=np.int32,
358
- chunks=tuple(tuple(1 for _ in c) for c in labeled.chunks),
359
- ).compute()
360
- _n_skip = int((_tile_maxes == 0).sum())
361
- logger.info(
362
- "skip_empty: %d/%d tiles ran fn, %d skipped (max<=%.4g)",
363
- int(_tile_maxes.size) - _n_skip,
364
- int(_tile_maxes.size),
365
- _n_skip,
366
- _skip_thr,
367
- )
368
+ # NB: no post-staging skip-count pass here — counting skipped tiles by
369
+ # re-reading the whole staged store off disk would double the I/O of the
370
+ # entire run just for a log line. Use estimate_empty_tiles() up front for
371
+ # that figure instead.
368
372
 
369
373
  def _cleanup_stage():
370
374
  if not keep_stage:
@@ -373,7 +377,9 @@ def tile_process(
373
377
  shutil.rmtree(stage_path, ignore_errors=True)
374
378
  logger.info("Removed stage store %s", stage_path)
375
379
 
376
- _nw = min(4, os.cpu_count() or 1)
380
+ # Merge runs in worker processes (each holds one chunk + an mmap'd LUT);
381
+ # size it to RAM/CPU like staging, capped so we don't spawn a process storm.
382
+ _nw = max_workers or max(1, min(safe_worker_count(_tile_nbytes), 8))
377
383
 
378
384
  # Default: input is a .zarr store and no explicit write_to → labels go back
379
385
  # *into* the input store under the NGFF labels/<name>/ group with an auto
@@ -247,3 +247,27 @@ def test_estimate_empty_tiles():
247
247
  assert info["n_tiles"] == 4
248
248
  assert info["n_occupied"] == 2
249
249
  assert info["empty_fraction"] == 0.5
250
+
251
+
252
+ def test_safe_worker_count_bounds():
253
+ import os
254
+
255
+ from patchworks._chunks import safe_worker_count
256
+
257
+ # GPU → always serial (no VRAM contention)
258
+ assert safe_worker_count(10**6, use_gpu=True) == 1
259
+ # Absurdly large tile → memory-bound to 1
260
+ assert safe_worker_count(10**15) == 1
261
+ # Tiny tile → CPU-bound, leaves a core free, always >= 1
262
+ n = safe_worker_count(1024)
263
+ assert 1 <= n <= max(1, (os.cpu_count() or 1) - 1)
264
+
265
+
266
+ def test_tile_process_max_workers():
267
+ import dask.array as da
268
+
269
+ from patchworks import tile_process
270
+
271
+ arr = da.from_array(_make_image((2, 32, 32)), chunks=(1, 32, 32))
272
+ result = tile_process(arr, _label_fn, max_workers=1).compute()
273
+ assert result.shape == (2, 32, 32)
File without changes
File without changes
File without changes
File without changes