patchworks 0.8.1__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {patchworks-0.8.1 → patchworks-0.9.0}/.github/workflows/lint.yml +10 -0
  2. patchworks-0.9.0/.markdownlint-cli2.yaml +16 -0
  3. {patchworks-0.8.1 → patchworks-0.9.0}/PKG-INFO +3 -2
  4. {patchworks-0.8.1 → patchworks-0.9.0}/README.md +2 -1
  5. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/stardist.md +10 -10
  6. {patchworks-0.8.1 → patchworks-0.9.0}/docs/getting_started.md +36 -36
  7. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/gpu_distributed.md +1 -1
  8. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/merging.md +2 -2
  9. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/ome_zarr_napari.md +22 -0
  10. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/pitfalls.md +1 -1
  11. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/skip_empty.md +1 -1
  12. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/tiling.md +3 -2
  13. {patchworks-0.8.1 → patchworks-0.9.0}/docs/index.md +1 -1
  14. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_chunks.py +16 -1
  15. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_cluster.py +19 -2
  16. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_core.py +41 -3
  17. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_io.py +26 -1
  18. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_merge.py +110 -1
  19. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/_relabel.py +10 -0
  20. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/plugins/cellpose.py +56 -2
  21. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/plugins/napari.py +85 -4
  22. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/plugins/ome_zarr.py +475 -21
  23. {patchworks-0.8.1 → patchworks-0.9.0}/tests/test_ome_zarr.py +39 -0
  24. {patchworks-0.8.1 → patchworks-0.9.0}/.github/workflows/docs.yml +0 -0
  25. {patchworks-0.8.1 → patchworks-0.9.0}/.github/workflows/release.yml +0 -0
  26. {patchworks-0.8.1 → patchworks-0.9.0}/.gitignore +0 -0
  27. {patchworks-0.8.1 → patchworks-0.9.0}/cliff.toml +0 -0
  28. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/chunks.md +0 -0
  29. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/cluster.md +0 -0
  30. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/io.md +0 -0
  31. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/merge_tile_labels.md +0 -0
  32. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/plugins/cellpose.md +0 -0
  33. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/plugins/napari.md +0 -0
  34. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/plugins/ome_zarr.md +0 -0
  35. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/relabel.md +0 -0
  36. {patchworks-0.8.1 → patchworks-0.9.0}/docs/api/tile_process.md +0 -0
  37. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/cellpose_2d.md +0 -0
  38. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/cellpose_2d.py +0 -0
  39. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/cellpose_3d.md +0 -0
  40. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/cellpose_3d.py +0 -0
  41. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/custom.md +0 -0
  42. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/custom_method.py +0 -0
  43. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/standalone_merge.md +0 -0
  44. {patchworks-0.8.1 → patchworks-0.9.0}/docs/examples/stardist_2d.py +0 -0
  45. {patchworks-0.8.1 → patchworks-0.9.0}/docs/guide/performance.md +0 -0
  46. {patchworks-0.8.1 → patchworks-0.9.0}/mkdocs.yml +0 -0
  47. {patchworks-0.8.1 → patchworks-0.9.0}/pyproject.toml +0 -0
  48. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/__init__.py +0 -0
  49. {patchworks-0.8.1 → patchworks-0.9.0}/src/patchworks/plugins/__init__.py +0 -0
  50. {patchworks-0.8.1 → patchworks-0.9.0}/tests/test_core.py +0 -0
  51. {patchworks-0.8.1 → patchworks-0.9.0}/tests/test_napari.py +0 -0
@@ -23,3 +23,13 @@ jobs:
23
23
  with:
24
24
  version: "0.15.18"
25
25
  args: "format --check"
26
+
27
+ markdownlint:
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+
32
+ - name: markdownlint
33
+ uses: DavidAnson/markdownlint-cli2-action@v20
34
+ with:
35
+ globs: "**/*.md"
@@ -0,0 +1,16 @@
1
+ # markdownlint-cli2 configuration — see https://github.com/DavidAnson/markdownlint
2
+ config:
3
+ MD013: false # line length (tables, code and long prose)
4
+ MD033: false # inline HTML (badges, <img>)
5
+ MD041: false # first line need not be a top-level heading (badges)
6
+ MD024:
7
+ siblings_only: true # repeated headings OK across different sections
8
+ MD046: false # allow both indented and fenced code blocks
9
+ MD060: false # don't enforce table pipe spacing style
10
+ globs:
11
+ - "**/*.md"
12
+ ignores:
13
+ - "site"
14
+ - ".pixi"
15
+ - "PUBLIC"
16
+ - "**/node_modules"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchworks
3
- Version: 0.8.1
3
+ Version: 0.9.0
4
4
  Summary: Tiled processing of arbitrarily large images with globally consistent labels
5
5
  Project-URL: Homepage, https://github.com/imcf/patchworks
6
6
  Project-URL: Issues, https://github.com/imcf/patchworks/issues
@@ -73,7 +73,7 @@ Description-Content-Type: text/markdown
73
73
 
74
74
  > Tiled processing of arbitrarily large images — any image, any function.
75
75
 
76
- ```
76
+ ```text
77
77
  ┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
78
78
  │ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
79
79
  ├──────┼──────┼──────┤ ├──────┼──────┼──────┤
@@ -361,6 +361,7 @@ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
361
361
  - dask[array], numpy, zarr, scipy
362
362
 
363
363
  Optional:
364
+
364
365
  - `psutil` — accurate RAM sizing for `tile_shape="auto"`
365
366
  - `nvidia-ml-py` — accurate GPU VRAM sizing
366
367
  - `tqdm` — progress bars
@@ -7,7 +7,7 @@
7
7
 
8
8
  > Tiled processing of arbitrarily large images — any image, any function.
9
9
 
10
- ```
10
+ ```text
11
11
  ┌──────┬──────┬──────┐ fn(tile) → labels ┌──────┬──────┬──────┐
12
12
  │ tile │ tile │ tile │ ─────────────────────► │ 1 │ 2 │ 3 │
13
13
  ├──────┼──────┼──────┤ ├──────┼──────┼──────┤
@@ -295,6 +295,7 @@ Full docs, guides and tutorials: **<https://imcf.one/patchworks/>**
295
295
  - dask[array], numpy, zarr, scipy
296
296
 
297
297
  Optional:
298
+
298
299
  - `psutil` — accurate RAM sizing for `tile_shape="auto"`
299
300
  - `nvidia-ml-py` — accurate GPU VRAM sizing
300
301
  - `tqdm` — progress bars
@@ -47,18 +47,18 @@ tile_process(
47
47
  Load the model **outside** the `fn` closure. If you load it inside,
48
48
  it will be re-initialised (and potentially re-downloaded) once per tile.
49
49
 
50
- For distributed execution, use `functools.partial` with a cached model:
50
+ For distributed execution, use `functools.partial` with a cached model:
51
51
 
52
- ```python
53
- from functools import lru_cache
52
+ ```python
53
+ from functools import lru_cache
54
54
 
55
55
 
56
- @lru_cache(maxsize=1)
57
- def _get_model():
58
- return StarDist2D.from_pretrained("2D_versatile_fluo")
56
+ @lru_cache(maxsize=1)
57
+ def _get_model():
58
+ return StarDist2D.from_pretrained("2D_versatile_fluo")
59
59
 
60
60
 
61
- def stardist_fn(tile):
62
- model = _get_model()
63
- ...
64
- ```
61
+ def stardist_fn(tile):
62
+ model = _get_model()
63
+ ...
64
+ ```
@@ -46,11 +46,11 @@ patchworks can be installed from PyPI on all operating systems, for Python ≥ 3
46
46
 
47
47
  ## The one function you need
48
48
 
49
- ```python
50
- from patchworks import tile_process
49
+ ```python
50
+ from patchworks import tile_process
51
51
 
52
- result = tile_process(image, fn)
53
- ```
52
+ result = tile_process(image, fn)
53
+ ```
54
54
 
55
55
  `tile_process(image, fn)` splits `image` into tiles, runs `fn` on each tile,
56
56
  and returns a globally consistent label array.
@@ -65,17 +65,17 @@ and returns a globally consistent label array.
65
65
  patchworks is method-agnostic. Your function receives a NumPy array (one tile)
66
66
  and must return an integer label array of the same shape:
67
67
 
68
- ```python
69
- import numpy as np
68
+ ```python
69
+ import numpy as np
70
70
 
71
71
 
72
- def my_fn(tile: np.ndarray) -> np.ndarray:
73
- from skimage.filters import threshold_otsu
74
- from skimage.measure import label
72
+ def my_fn(tile: np.ndarray) -> np.ndarray:
73
+ from skimage.filters import threshold_otsu
74
+ from skimage.measure import label
75
75
 
76
- binary = tile > threshold_otsu(tile)
77
- return label(binary).astype("int32")
78
- ```
76
+ binary = tile > threshold_otsu(tile)
77
+ return label(binary).astype("int32")
78
+ ```
79
79
 
80
80
  The function is called independently on every tile. patchworks ensures that
81
81
  objects spanning tile boundaries are merged into a single label.
@@ -155,14 +155,14 @@ objects spanning tile boundaries are merged into a single label.
155
155
  Methods like Cellpose and StarDist need spatial context at tile boundaries.
156
156
  Use `overlap` (in voxels) so boundary objects are fully visible:
157
157
 
158
- ```python
159
- result = tile_process(
160
- "image.zarr",
161
- my_fn,
162
- tile_shape=(1, 2048, 2048),
163
- overlap=20, # 20-voxel halo on every side
164
- )
165
- ```
158
+ ```python
159
+ result = tile_process(
160
+ "image.zarr",
161
+ my_fn,
162
+ tile_shape=(1, 2048, 2048),
163
+ overlap=20, # 20-voxel halo on every side
164
+ )
165
+ ```
166
166
 
167
167
  !!! info "How overlap works"
168
168
  Each tile is expanded by `overlap` voxels on every side before calling `fn`.
@@ -173,22 +173,22 @@ result = tile_process(
173
173
 
174
174
  ## Use Cellpose
175
175
 
176
- ```python
177
- from patchworks import tile_process
178
- from patchworks.plugins.cellpose import cellpose_fn
179
-
180
- fn = cellpose_fn("cyto3", gpu=True, diameter=30)
181
-
182
- tile_process(
183
- "image.zarr",
184
- fn,
185
- channel=0,
186
- tile_shape=(1, 2048, 2048),
187
- overlap=20,
188
- write_to="labels.zarr",
189
- progress=True,
190
- )
191
- ```
176
+ ```python
177
+ from patchworks import tile_process
178
+ from patchworks.plugins.cellpose import cellpose_fn
179
+
180
+ fn = cellpose_fn("cyto3", gpu=True, diameter=30)
181
+
182
+ tile_process(
183
+ "image.zarr",
184
+ fn,
185
+ channel=0,
186
+ tile_shape=(1, 2048, 2048),
187
+ overlap=20,
188
+ write_to="labels.zarr",
189
+ progress=True,
190
+ )
191
+ ```
192
192
 
193
193
  See the [Cellpose 2-D example](examples/cellpose_2d.md) for the full workflow.
194
194
 
@@ -60,7 +60,7 @@ in the same process as the kernel. When your segmentation function holds the
60
60
  Python GIL (every PyTorch/CUDA `eval` does), the worker thread can't send
61
61
  heartbeats. The scheduler declares it dead, and the merge fails:
62
62
 
63
- ```
63
+ ```python
64
64
  FutureCancelledError: lost dependencies
65
65
  ```
66
66
 
@@ -9,7 +9,7 @@ even though it's the same cell.
9
9
 
10
10
  patchworks solves this with a zarr-native merge algorithm:
11
11
 
12
- ```
12
+ ```text
13
13
  Tile A labels: Tile B labels: After merge:
14
14
  ┌────────────┐ ┌────────────┐ ┌──────────────────────┐
15
15
  │ 3 1 2 │ │ 1 4 2 │ │ 3 1 2 │ 501 5 502│
@@ -32,7 +32,7 @@ Each tile's labels are written to a temporary zarr once. This is critical:
32
32
  without staging, any downstream operation that reads the label array re-runs
33
33
  your segmentation function. The merge internally reads labels multiple times.
34
34
 
35
- ```
35
+ ```text
36
36
  tile_process calls fn once per tile → staged zarr
37
37
 
38
38
  merge reads from staged zarr (no fn calls)
@@ -76,6 +76,28 @@ and streaming the downsampled result out through dask with bounded chunks. The
76
76
  graph never chains level-on-level and no whole plane/volume is held in RAM, so
77
77
  terabyte images convert in bounded memory.
78
78
 
79
+ ### Sharding (fewer files)
80
+
81
+ A big array becomes tens of thousands of tiny chunk files, which strain
82
+ filesystems and object stores. Sharding packs many chunks into one **shard**
83
+ file (zarr v3), cutting the file count ~100×:
84
+
85
+ ```python
86
+ to_ome_zarr("scan.ims", "scan.zarr", shard=True) # auto ~512 MB shards
87
+ to_ome_zarr("scan.ims", "scan.zarr", shard=(1, 16, 2048, 2048)) # explicit
88
+ ```
89
+
90
+ Default is `shard=False` for maximum reader compatibility — sharding is
91
+ zarr-v3-only, so older tools may not read it (your zarr/napari stack does).
92
+ A sharded write holds ~one shard per worker in RAM, so very large shards cost
93
+ memory.
94
+
95
+ ### Progress
96
+
97
+ All write steps show a dask progress bar **by default** (`progress=True`), so
98
+ you can see how long a conversion will take. Pass `progress=False` to silence
99
+ it.
100
+
79
101
  !!! note "Install the readers you need"
80
102
  `pip install "patchworks[bioio]"` pulls `bioio` plus the `bioio-bioformats`
81
103
  catch-all reader (needs a JVM). For speed, add native readers for your
@@ -30,7 +30,7 @@ single-GPU runs — patchworks pins it to 1 thread automatically).
30
30
 
31
31
  patchworks detects in-process clients at startup and raises immediately:
32
32
 
33
- ```
33
+ ```python
34
34
  RuntimeError: Active Dask client uses an in-process worker (processes=False).
35
35
  This breaks the label merge when fn holds the GIL. Use a process-based
36
36
  cluster instead:
@@ -88,6 +88,6 @@ tile_process(
88
88
  After a `tile_process` run with `skip_empty=True`, the log reports exactly
89
89
  how many tiles ran your function:
90
90
 
91
- ```
91
+ ```text
92
92
  INFO patchworks._core: skip_empty: 486/2200 tiles ran fn, 1714 skipped (max<=412.0)
93
93
  ```
@@ -13,6 +13,7 @@ peak RAM during segmentation is approximately one tile's worth of data.
13
13
  ## Choosing a tile size
14
14
 
15
15
  The right tile size depends on:
16
+
16
17
  - Your available RAM (or GPU VRAM)
17
18
  - The minimum context your segmentation method needs (objects should fit fully
18
19
  inside a tile, or you need overlap)
@@ -62,7 +63,7 @@ Methods that need spatial context (Cellpose, StarDist, U-Net) produce wrong
62
63
  results near tile edges: objects at the boundary are cut off. Overlap fixes this
63
64
  by expanding each tile by `overlap` voxels on every side.
64
65
 
65
- ```
66
+ ```text
66
67
  No overlap: With overlap=20:
67
68
  ┌──────────┐ ┌──────────────────┐
68
69
  │ │ │ ░░░░░░░░░░░░░░ │
@@ -86,4 +87,4 @@ No overlap: With overlap=20:
86
87
  automatically clips the depth per axis, so z-tiles of size 1 (typical in
87
88
  2-D Cellpose mode) get `depth=0` in z even if you pass `overlap=20`.
88
89
 
89
- Axes that are too small for the requested overlap simply get a smaller halo.
90
+ Axes that are too small for the requested overlap simply get a smaller halo.
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Tiled processing of arbitrarily large images — any image, any function.**
4
4
 
5
- ```
5
+ ```text
6
6
  ┌──────┬──────┬──────┐ ┌──────┬──────┬──────┐
7
7
  │ │ │ │ fn(tile) → IDs │ 1 │ 2 │ 3 │
8
8
  │ │ │ │ ───────────────► │ │ │ │
@@ -49,6 +49,14 @@ _GPU_MEMORY_FALLBACK = 8 * 1024**3
49
49
 
50
50
 
51
51
  def _get_available_memory() -> int:
52
+ """Return available system RAM in bytes.
53
+
54
+ Returns
55
+ -------
56
+ int
57
+ Available memory via ``psutil``, or an 8 GiB fallback if it is not
58
+ installed.
59
+ """
52
60
  try:
53
61
  import psutil
54
62
 
@@ -103,7 +111,14 @@ def safe_worker_count(
103
111
 
104
112
 
105
113
  def _get_gpu_memory() -> int:
106
- """Return free GPU VRAM in bytes. Falls back to 8 GiB default."""
114
+ """Return free GPU VRAM in bytes.
115
+
116
+ Returns
117
+ -------
118
+ int
119
+ Free VRAM of GPU 0 via ``nvidia-ml-py``, or an 8 GiB fallback if the
120
+ query fails.
121
+ """
107
122
  try:
108
123
  import pynvml
109
124
 
@@ -9,7 +9,14 @@ logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
11
  def _distributed_client():
12
- """Return the active dask.distributed Client, or None."""
12
+ """Return the active dask.distributed Client, or None.
13
+
14
+ Returns
15
+ -------
16
+ distributed.Client or None
17
+ The current client, or ``None`` if none is active / distributed is not
18
+ installed.
19
+ """
13
20
  try:
14
21
  from dask.distributed import get_client
15
22
 
@@ -19,12 +26,22 @@ def _distributed_client():
19
26
 
20
27
 
21
28
  def _client_is_in_process(client) -> bool:
22
- """True if *client* runs its worker in this process (processes=False).
29
+ """Whether *client* runs its worker in this process (``processes=False``).
23
30
 
24
31
  An in-process worker shares the GIL. A long task that holds the GIL
25
32
  (e.g. a Cellpose/torch eval) starves the worker heartbeat, the scheduler
26
33
  declares it dead, and the P2P merge barrier drops its inputs →
27
34
  "FutureCancelledError: lost dependencies".
35
+
36
+ Parameters
37
+ ----------
38
+ client : distributed.Client
39
+ The client to inspect.
40
+
41
+ Returns
42
+ -------
43
+ bool
44
+ True if any worker address uses the ``inproc://`` transport.
28
45
  """
29
46
  try:
30
47
  for addr in client.scheduler_info().get("workers", {}):
@@ -23,7 +23,23 @@ logger = logging.getLogger(__name__)
23
23
  def _stage_to_zarr(
24
24
  arr: da.Array, path: str, component: str, show_progress: bool
25
25
  ) -> None:
26
- """Write *arr* to zarr *path/component*, never loading it into RAM."""
26
+ """Write *arr* to zarr ``path/component``, never loading it into RAM.
27
+
28
+ Parameters
29
+ ----------
30
+ arr : da.Array
31
+ Array to materialise to disk.
32
+ path : str
33
+ Zarr store path.
34
+ component : str
35
+ Array name within the store.
36
+ show_progress : bool
37
+ Show a progress bar while computing.
38
+
39
+ Returns
40
+ -------
41
+ None
42
+ """
27
43
  import dask
28
44
 
29
45
  lazy_write = arr.to_zarr(
@@ -57,7 +73,7 @@ def tile_process(
57
73
  level: int = 0,
58
74
  use_gpu: bool = False,
59
75
  max_workers: int | None = None,
60
- progress: bool = False,
76
+ progress: bool = True,
61
77
  write_to: Union[str, Path, None] = None,
62
78
  output_component: str = "labels",
63
79
  pyramid_levels: int = 5,
@@ -123,7 +139,8 @@ def tile_process(
123
139
  every core. Ignored when a distributed client is active (it manages its
124
140
  own concurrency).
125
141
  progress:
126
- Show a progress bar during the tile-writing and relabel steps.
142
+ Show progress bars for staging, the label write and the pyramid
143
+ (default ``True``). Set ``False`` to silence them.
127
144
  write_to:
128
145
  Explicit output zarr store path. Overrides the default behaviour: the
129
146
  merged labels are written here as a single-resolution array named
@@ -297,6 +314,20 @@ def tile_process(
297
314
  _skip_thr = _auto_empty_threshold(image_for_threshold, channel, level)
298
315
 
299
316
  def active_fn(block, block_info=None):
317
+ """Run *fn* on one tile, or return zeros for an empty tile.
318
+
319
+ Parameters
320
+ ----------
321
+ block : np.ndarray
322
+ One image tile.
323
+ block_info : dict or None
324
+ Dask block metadata (used for logging the tile location).
325
+
326
+ Returns
327
+ -------
328
+ np.ndarray
329
+ Integer labels, or an all-zero tile when skipped.
330
+ """
300
331
  loc = block_info[0].get("chunk-location") if block_info else "?"
301
332
  if skip_empty and block.size and block.max() <= _skip_thr:
302
333
  if verbose:
@@ -398,6 +429,12 @@ def tile_process(
398
429
  # that figure instead.
399
430
 
400
431
  def _cleanup_stage():
432
+ """Delete the temporary stage store unless ``keep_stage`` is set.
433
+
434
+ Returns
435
+ -------
436
+ None
437
+ """
401
438
  if not keep_stage:
402
439
  import shutil
403
440
 
@@ -457,6 +494,7 @@ def tile_process(
457
494
  name=output_component,
458
495
  n_levels=pyramid_levels,
459
496
  downscale=pyramid_downscale,
497
+ progress=progress,
460
498
  overwrite=True,
461
499
  )
462
500
  shutil.rmtree(os.path.dirname(_merge_out), ignore_errors=True)
@@ -75,6 +75,16 @@ def _otsu_threshold(sample: np.ndarray) -> float:
75
75
 
76
76
  Operates on the full distribution including zeros — zeros are background
77
77
  pixels and must be included so Otsu can find the signal/background boundary.
78
+
79
+ Parameters
80
+ ----------
81
+ sample : np.ndarray
82
+ Flat intensity sample.
83
+
84
+ Returns
85
+ -------
86
+ float
87
+ The Otsu threshold, or ``0.0`` when the sample is degenerate.
78
88
  """
79
89
  try:
80
90
  from skimage.filters import threshold_otsu
@@ -89,7 +99,22 @@ def _otsu_threshold(sample: np.ndarray) -> float:
89
99
  def _auto_empty_threshold(
90
100
  image: da.Array, channel: int | None, level: int
91
101
  ) -> float:
92
- """Pick an empty-tile threshold from a cheap bounded sample (Otsu)."""
102
+ """Pick an empty-tile threshold from a cheap bounded sample (Otsu).
103
+
104
+ Parameters
105
+ ----------
106
+ image : da.Array
107
+ Image to sample.
108
+ channel : int or None
109
+ Channel hint (kept for signature symmetry).
110
+ level : int
111
+ Pyramid level hint (kept for signature symmetry).
112
+
113
+ Returns
114
+ -------
115
+ float
116
+ Otsu threshold over a few small centred windows.
117
+ """
93
118
  n = image.ndim
94
119
  win = [min(64 if i >= n - 3 else s, s) for i, s in enumerate(image.shape)]
95
120
  win = [min(w, 256) if i >= n - 2 else w for i, w in enumerate(win)]
@@ -55,6 +55,25 @@ _merge_out_comp: "str | None" = None
55
55
 
56
56
 
57
57
  def _init_worker(lut_path, staged_path, staged_comp, out_path, out_comp):
58
+ """Initialise a merge worker process with the shared paths and LUT.
59
+
60
+ Parameters
61
+ ----------
62
+ lut_path : str
63
+ Path to the relabel lookup table (loaded memory-mapped, read-only).
64
+ staged_path : str
65
+ Path to the staged-labels zarr store.
66
+ staged_comp : str
67
+ Component name within the staged store.
68
+ out_path : str
69
+ Path to the output zarr store.
70
+ out_comp : str
71
+ Component name within the output store.
72
+
73
+ Returns
74
+ -------
75
+ None
76
+ """
58
77
  global _merge_lut, _merge_lut_path, _merge_staged_path, _merge_staged_comp
59
78
  global _merge_out_path, _merge_out_comp
60
79
  _merge_lut = np.load(
@@ -68,6 +87,17 @@ def _init_worker(lut_path, staged_path, staged_comp, out_path, out_comp):
68
87
 
69
88
 
70
89
  def _relabel_chunk_worker(chunk_slice: tuple) -> None:
90
+ """Apply the relabel LUT to one chunk and write it to the output store.
91
+
92
+ Parameters
93
+ ----------
94
+ chunk_slice : tuple
95
+ The slice selecting this chunk in both stores.
96
+
97
+ Returns
98
+ -------
99
+ None
100
+ """
71
101
  src = zarr.open_group(_merge_staged_path, mode="r")[_merge_staged_comp]
72
102
  dst = zarr.open_group(_merge_out_path, mode="r+")[_merge_out_comp]
73
103
  block = np.asarray(src[chunk_slice], dtype=np.int64)
@@ -87,6 +117,20 @@ def _relabel_chunk_worker(chunk_slice: tuple) -> None:
87
117
  def _boundary_face_specs(
88
118
  shape: tuple[int, ...], chunk_shape: tuple[int, ...]
89
119
  ) -> list[tuple[int, int]]:
120
+ """Enumerate interior chunk boundaries to scan for touching labels.
121
+
122
+ Parameters
123
+ ----------
124
+ shape : tuple of int
125
+ Array shape.
126
+ chunk_shape : tuple of int
127
+ Chunk shape.
128
+
129
+ Returns
130
+ -------
131
+ list of tuple of int
132
+ ``(axis, position)`` pairs, one per interior chunk boundary.
133
+ """
90
134
  specs = []
91
135
  for ax, (s, cs) in enumerate(zip(shape, chunk_shape)):
92
136
  pos = cs
@@ -105,6 +149,21 @@ def _scan_touching_pairs(
105
149
  is bounded to one chunk (~200 MB). Reading the full face at once
106
150
  (slice(None) on face axes) would allocate face_area × 8 bytes in one shot —
107
151
  e.g. 37888 × 27392 × 8 = 8 GiB for a single z-face (OOM on real datasets).
152
+
153
+ Parameters
154
+ ----------
155
+ zarr_path : str
156
+ Path to the staged-labels zarr store.
157
+ component : str
158
+ Component name within the store.
159
+ chunk_shape : tuple of int
160
+ Chunk shape (sets the per-read column size).
161
+
162
+ Returns
163
+ -------
164
+ np.ndarray
165
+ ``(N, 2)`` int64 array of unique label pairs touching across a
166
+ boundary.
108
167
  """
109
168
  root = zarr.open_group(zarr_path, mode="r")
110
169
  arr = root[component]
@@ -135,7 +194,20 @@ def _scan_touching_pairs(
135
194
 
136
195
 
137
196
  def _build_relabel_lut(pairs: np.ndarray, max_label: int) -> np.ndarray:
138
- """Touching-pairs scipy connected components relabeling LUT."""
197
+ """Build a relabel LUT from touching pairs via connected components.
198
+
199
+ Parameters
200
+ ----------
201
+ pairs : np.ndarray
202
+ ``(N, 2)`` array of touching label pairs.
203
+ max_label : int
204
+ Largest label id present.
205
+
206
+ Returns
207
+ -------
208
+ np.ndarray
209
+ Lookup table mapping each old label to its merged (component) id.
210
+ """
139
211
  if max_label > _LUT_WARN_THRESHOLD:
140
212
  logger.warning(
141
213
  "_build_relabel_lut: max_label=%d → LUT ~%.0f MB. "
@@ -168,6 +240,24 @@ def _build_relabel_lut(pairs: np.ndarray, max_label: int) -> np.ndarray:
168
240
  def _create_zarr_label_array(
169
241
  group: zarr.Group, name: str, shape: tuple, chunks: tuple
170
242
  ) -> zarr.Array:
243
+ """Create (replacing any existing) an int32 label array in *group*.
244
+
245
+ Parameters
246
+ ----------
247
+ group : zarr.Group
248
+ Parent group.
249
+ name : str
250
+ Array name (may be a nested path).
251
+ shape : tuple
252
+ Array shape.
253
+ chunks : tuple
254
+ Chunk shape.
255
+
256
+ Returns
257
+ -------
258
+ zarr.Array
259
+ The newly created array (works on zarr v2 and v3).
260
+ """
171
261
  if name in group:
172
262
  del group[name]
173
263
  if _ZARR_V3:
@@ -192,6 +282,25 @@ def zarr_native_merge(
192
282
  Scales to 2000+ chunks where the dask_image approach stalls (O(n_chunks²)
193
283
  graph). Reads *staged_path/staged_component*, merges touching cross-boundary
194
284
  labels, writes result to *out_path/out_component*. No dask task graph.
285
+
286
+ Parameters
287
+ ----------
288
+ staged_path : str
289
+ Path to the staged-labels zarr store.
290
+ staged_component : str
291
+ Component name within the staged store.
292
+ out_path : str
293
+ Path to the output zarr store.
294
+ out_component : str
295
+ Component name within the output store.
296
+ n_workers : int
297
+ Number of worker processes for the parallel relabel.
298
+ show_progress : bool
299
+ Show a progress bar over the relabel chunks.
300
+
301
+ Returns
302
+ -------
303
+ None
195
304
  """
196
305
  root = zarr.open_group(staged_path, mode="r")
197
306
  arr = root[staged_component]