rmcontrols 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rmcontrols/__init__.py ADDED
@@ -0,0 +1,46 @@
1
+ """rmcontrols — detect and flag control tissues in IHC thumbnail images."""
2
+
3
+ from ._extract import ExtractResult, extract_thumbnails
4
+ from ._hooks import DetectionHooks
5
+ from ._region import ControlRegion
6
+ from ._s3 import (
7
+ glob_s3,
8
+ load_mrxs_thumbnail_from_s3,
9
+ load_slide_thumbnail_from_s3,
10
+ open_slide_thumbnail,
11
+ )
12
+ from ._validation import (
13
+ validate_control_split_x,
14
+ validate_control_split_x_batch,
15
+ validate_control_split_x_wsi,
16
+ )
17
+ from .detector import detect_controls, detect_controls_debug
18
+ from .viz import visualize, visualize_debug
19
+
20
+ __version__ = "0.1.0"
21
+
22
+ __all__ = [
23
+ # Detection
24
+ "detect_controls",
25
+ "detect_controls_debug",
26
+ # Data classes
27
+ "ControlRegion",
28
+ "DetectionHooks",
29
+ # Rendering
30
+ "visualize",
31
+ "visualize_debug",
32
+ # Interactive validation
33
+ "validate_control_split_x",
34
+ "validate_control_split_x_batch",
35
+ "validate_control_split_x_wsi",
36
+ # Thumbnail extraction
37
+ "extract_thumbnails",
38
+ "ExtractResult",
39
+ # S3 / slide reader
40
+ "glob_s3",
41
+ "load_slide_thumbnail_from_s3",
42
+ "load_mrxs_thumbnail_from_s3", # backwards-compat alias
43
+ "open_slide_thumbnail",
44
+ # Package metadata
45
+ "__version__",
46
+ ]
rmcontrols/_blobs.py ADDED
@@ -0,0 +1,110 @@
1
+ """Connected-component blob extraction from binary tissue masks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ from scipy import ndimage
7
+
8
+ from ._types import BlobDict
9
+
10
+
11
+ def _extract_blobs(mask: np.ndarray, min_area: int) -> list[BlobDict]:
12
+ """Extract connected components from a binary mask and filter by area.
13
+
14
+ Uses :func:`scipy.ndimage.find_objects` for a single-pass label scan
15
+ instead of repeated full-array comparisons (``labeled == i``), reducing
16
+ complexity from ``O(H × W × n_blobs)`` to ``O(H × W + sum(blob_areas))``.
17
+
18
+ Parameters
19
+ ----------
20
+ mask : np.ndarray, shape (H, W), dtype bool
21
+ Binary tissue mask.
22
+ min_area : int
23
+ Blobs with fewer than this many pixels are discarded.
24
+
25
+ Returns
26
+ -------
27
+ list of BlobDict
28
+ One dict per surviving blob, with ``blob_id``, ``pixels``, ``area``,
29
+ ``bbox``, ``centroid``, and ``role`` populated. ``features`` is not
30
+ populated here; see :func:`~rmcontrols._features._shape_features`.
31
+ """
32
+ labeled, _ = ndimage.label(mask)
33
+ slices = ndimage.find_objects(labeled) # one pass over the label array
34
+ blobs: list[BlobDict] = []
35
+
36
+ for blob_id, sl in enumerate(slices, start=1):
37
+ if sl is None:
38
+ continue
39
+ rows_sl, cols_sl = sl
40
+ sub = labeled[sl] == blob_id
41
+ area = int(sub.sum())
42
+ if area < min_area:
43
+ continue
44
+
45
+ r0, c0 = rows_sl.start, cols_sl.start
46
+ r1, c1 = rows_sl.stop - 1, cols_sl.stop - 1
47
+ rows_idx, cols_idx = np.where(sub)
48
+ pixels = np.column_stack([rows_idx + r0, cols_idx + c0])
49
+
50
+ blobs.append(
51
+ {
52
+ "blob_id": blob_id,
53
+ "pixels": pixels,
54
+ "area": area,
55
+ "bbox": (c0, r0, c1 - c0 + 1, r1 - r0 + 1),
56
+ "centroid": (
57
+ float(pixels[:, 0].mean()),
58
+ float(pixels[:, 1].mean()),
59
+ ),
60
+ "role": "main",
61
+ }
62
+ )
63
+
64
+ return blobs
65
+
66
+
67
+ def _perimeter(pixels: np.ndarray) -> int:
68
+ """Count the boundary pixels of a blob.
69
+
70
+ Converts the pixel set to a compact boolean mask, then erodes it by one
71
+ step; pixels present in the original but absent after erosion are the
72
+ boundary.
73
+
74
+ Parameters
75
+ ----------
76
+ pixels : np.ndarray, shape (N, 2), dtype int
77
+ Row/column coordinates of every foreground pixel (full-image frame).
78
+
79
+ Returns
80
+ -------
81
+ int
82
+ Number of boundary pixels.
83
+ """
84
+ local = _pixels_to_mask(pixels)
85
+ boundary = local & ~ndimage.binary_erosion(local)
86
+ return int(boundary.sum())
87
+
88
+
89
+ def _pixels_to_mask(pixels: np.ndarray) -> np.ndarray:
90
+ """Convert a pixel-coordinate array to a compact boolean mask.
91
+
92
+ The output mask is the tightest bounding box that contains all pixels;
93
+ its origin corresponds to the minimum row and column in *pixels*.
94
+
95
+ Parameters
96
+ ----------
97
+ pixels : np.ndarray, shape (N, 2), dtype int
98
+ Row/column coordinates in any reference frame.
99
+
100
+ Returns
101
+ -------
102
+ np.ndarray, dtype bool
103
+ Local boolean mask sized ``(max_row - min_row + 1,
104
+ max_col - min_col + 1)``.
105
+ """
106
+ rows, cols = pixels[:, 0], pixels[:, 1]
107
+ r0, c0 = int(rows.min()), int(cols.min())
108
+ out = np.zeros((int(rows.max()) - r0 + 1, int(cols.max()) - c0 + 1), dtype=bool)
109
+ out[rows - r0, cols - c0] = True
110
+ return out
rmcontrols/_build.py ADDED
@@ -0,0 +1,41 @@
1
+ """Build public ControlRegion objects from raw blob dicts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ._region import ControlRegion
6
+ from ._types import BlobDict
7
+
8
+
9
+ def _build_regions(blobs: list[BlobDict]) -> list[ControlRegion]:
10
+ """Convert a list of accepted control blobs into public ControlRegion objects.
11
+
12
+ Blobs are sorted by centroid column position (left-to-right) so that
13
+ labels are assigned in a consistent, human-readable order regardless of
14
+ the order in which connected components were labelled.
15
+
16
+ Parameters
17
+ ----------
18
+ blobs : list of BlobDict
19
+ Accepted control blobs (role ``'strip_control'`` or
20
+ ``'strip_proximity'``). Each must have ``centroid``, ``bbox``,
21
+ ``area``, and ``features`` populated.
22
+
23
+ Returns
24
+ -------
25
+ list of ControlRegion
26
+ One :class:`~rmcontrols.ControlRegion` per blob, labelled
27
+ ``'control_0'``, ``'control_1'``, … in centroid-column order.
28
+ """
29
+ sorted_blobs = sorted(blobs, key=lambda b: (b["centroid"][1], b["centroid"][0]))
30
+ return [
31
+ ControlRegion(
32
+ label=f"control_{i}",
33
+ bbox=b["bbox"],
34
+ metadata={
35
+ "area_px": b["area"],
36
+ "centroid": b["centroid"],
37
+ "features": b.get("features", {}),
38
+ },
39
+ )
40
+ for i, b in enumerate(sorted_blobs)
41
+ ]
@@ -0,0 +1,130 @@
1
+ """CLI entry point for parallel thumbnail extraction from whole-slide images.
2
+
3
+ Entry point
4
+ -----------
5
+ ``rmcontrols-extract-thumbnails``
6
+ Resolve a local or S3 glob pattern, extract a downsampled thumbnail
7
+ from each matched slide in parallel, and save the results as PNG (or
8
+ other Pillow-supported) image files.
9
+
10
+ Examples
11
+ --------
12
+ ::
13
+
14
+ # Local slides (non-recursive: only files directly inside slides/)
15
+ rmcontrols-extract-thumbnails "slides/*.svs" --output-dir thumbnails/
16
+
17
+ # Multiple formats via brace expansion (shell-level)
18
+ rmcontrols-extract-thumbnails "slides/*.mrxs" --output-dir thumbnails/ \\
19
+ --thumbnail-size 2000 --workers 8
20
+
21
+ # S3 slides
22
+ rmcontrols-extract-thumbnails "s3://my-bucket/slides/*.svs" \\
23
+ --output-dir thumbnails/
24
+
25
+ # S3 with explicit AWS profile and JPEG output
26
+ rmcontrols-extract-thumbnails "s3://my-bucket/slides/*.ndpi" \\
27
+ --output-dir thumbnails/ --format jpeg --aws-profile my-profile
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import argparse
33
+ import sys
34
+ from pathlib import Path
35
+
36
+
37
+ def main(argv: list[str] | None = None) -> None:
38
+ """Entry point for ``rmcontrols-extract-thumbnails``."""
39
+ parser = argparse.ArgumentParser(
40
+ prog="rmcontrols-extract-thumbnails",
41
+ description=(
42
+ "Extract downsampled thumbnails from whole-slide images matched"
43
+ " by a glob pattern (local or S3). Slides are processed in parallel."
44
+ ),
45
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
46
+ )
47
+ parser.add_argument(
48
+ "glob",
49
+ help=(
50
+ "Glob pattern for slide files, e.g. 'slides/*.svs' or"
51
+ " 's3://bucket/slides/*.mrxs'. Quote to prevent shell expansion."
52
+ " The pattern is non-recursive: '*' does not cross '/' boundaries."
53
+ ),
54
+ )
55
+ parser.add_argument(
56
+ "--output-dir",
57
+ "-o",
58
+ type=Path,
59
+ default=Path("thumbnails"),
60
+ metavar="DIR",
61
+ help="Directory where extracted thumbnails are written.",
62
+ )
63
+ parser.add_argument(
64
+ "--thumbnail-size",
65
+ type=int,
66
+ default=1000,
67
+ metavar="PX",
68
+ help="Maximum side length (width or height) of each thumbnail in pixels.",
69
+ )
70
+ parser.add_argument(
71
+ "--workers",
72
+ "-j",
73
+ type=int,
74
+ default=4,
75
+ metavar="N",
76
+ help="Number of parallel worker threads.",
77
+ )
78
+ parser.add_argument(
79
+ "--format",
80
+ dest="fmt",
81
+ default="png",
82
+ metavar="FMT",
83
+ help="Output image format accepted by Pillow (e.g. png, jpeg).",
84
+ )
85
+ parser.add_argument(
86
+ "--overwrite",
87
+ action="store_true",
88
+ help="Overwrite existing output files. Without this flag, already-extracted slides are skipped.",
89
+ )
90
+ parser.add_argument(
91
+ "--aws-profile",
92
+ default=None,
93
+ metavar="PROFILE",
94
+ help="Boto3 AWS profile name for S3 access. Uses the default credential chain when omitted.",
95
+ )
96
+ args = parser.parse_args(argv)
97
+
98
+ from ._extract import extract_thumbnails
99
+
100
+ try:
101
+ results = extract_thumbnails(
102
+ args.glob,
103
+ output_dir=args.output_dir,
104
+ thumbnail_size=args.thumbnail_size,
105
+ workers=args.workers,
106
+ fmt=args.fmt,
107
+ overwrite=args.overwrite,
108
+ aws_profile=args.aws_profile,
109
+ )
110
+ except ValueError as exc:
111
+ sys.exit(str(exc))
112
+
113
+ ok = [r for r in results if r.error is None]
114
+ skipped = [r for r in results if r.error and "already exists" in r.error]
115
+ failed = [r for r in results if r.error and "already exists" not in r.error]
116
+
117
+ for r in ok:
118
+ print(f" [ok] {r.slide_path} → {r.output_path}")
119
+ for r in skipped:
120
+ print(f" [skipped] {r.slide_path} (output exists, use --overwrite)")
121
+ for r in failed:
122
+ print(f" [failed] {r.slide_path} — {r.error}", file=sys.stderr)
123
+
124
+ print(
125
+ f"\nDone: {len(ok)} extracted, {len(skipped)} skipped, {len(failed)} failed"
126
+ f" → {args.output_dir}"
127
+ )
128
+
129
+ if failed:
130
+ sys.exit(1)
@@ -0,0 +1,287 @@
1
+ """CLI entry points for interactive batch validation of control_split_x.
2
+
3
+ Entry points
4
+ ------------
5
+ ``rmcontrols-validate-thumbnails``
6
+ Batch-validate control_split_x over a glob of thumbnail images.
7
+
8
+ ``rmcontrols-validate-slides``
9
+ Batch-validate control_split_x over a glob of whole-slide image files,
10
+ loading thumbnails via OpenSlide (local) or S3.
11
+
12
+ Both commands write a JSON results file on exit. The default output path is
13
+ ``./outputs/<command>.json`` (the directory is created automatically).
14
+ Use ``--overwrite`` to replace an existing file; without it the command
15
+ aborts with a warning.
16
+
17
+ Examples
18
+ --------
19
+ ::
20
+
21
+ rmcontrols-validate-thumbnails "assets/*.png" --side left
22
+
23
+ rmcontrols-validate-thumbnails "assets/*.png" --side left \\
24
+ --output results.json --overwrite
25
+
26
+ rmcontrols-validate-slides "slides/*.mrxs" --side left \\
27
+ --thumbnail-size 1000
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import argparse
33
+ import json
34
+ import sys
35
+ from pathlib import Path
36
+
37
+ _DEFAULT_OUTPUT_DIR = Path("outputs")
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Shared argument building
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ def _add_common_args(parser: argparse.ArgumentParser) -> None:
46
+ """Add detection-tuning and output arguments shared by both commands."""
47
+ parser.add_argument(
48
+ "--side",
49
+ choices=["left", "right"],
50
+ default="left",
51
+ help="Side where controls are placed",
52
+ )
53
+ parser.add_argument(
54
+ "--strip-width",
55
+ type=float,
56
+ default=0.40,
57
+ metavar="FRAC",
58
+ help="Strip width as fraction of image width (max 0.40)",
59
+ )
60
+ parser.add_argument(
61
+ "--threshold",
62
+ type=float,
63
+ default=0.05,
64
+ metavar="Z",
65
+ help="Dissimilarity Z-score threshold",
66
+ )
67
+ parser.add_argument(
68
+ "--min-area",
69
+ type=int,
70
+ default=500,
71
+ metavar="PX",
72
+ help="Minimum blob area in pixels",
73
+ )
74
+ parser.add_argument(
75
+ "--max-aspect-ratio",
76
+ type=float,
77
+ default=5.0,
78
+ metavar="R",
79
+ help="Reject blobs with bounding-box aspect ratio above this",
80
+ )
81
+ parser.add_argument(
82
+ "--split-margin",
83
+ type=int,
84
+ default=50,
85
+ metavar="PX",
86
+ help="Extra pixels added beyond the outermost control bbox edge",
87
+ )
88
+ parser.add_argument(
89
+ "--proximity",
90
+ type=int,
91
+ default=50,
92
+ metavar="PX",
93
+ help="Proximity rescue radius in pixels",
94
+ )
95
+ parser.add_argument(
96
+ "--full-debug",
97
+ action="store_true",
98
+ help="Show the full 5-panel debug grid instead of the simple split-x view",
99
+ )
100
+ parser.add_argument(
101
+ "--overwrite",
102
+ action="store_true",
103
+ help="Overwrite the output file if it already exists",
104
+ )
105
+
106
+
107
+ def _detection_kwargs(args: argparse.Namespace) -> dict:
108
+ """Build **kwargs dict for detect_controls_debug from parsed args."""
109
+ return dict(
110
+ strip_width_frac=args.strip_width,
111
+ dissimilarity_threshold=args.threshold,
112
+ min_tissue_area_px=args.min_area,
113
+ max_aspect_ratio=args.max_aspect_ratio,
114
+ control_split_x_margin=args.split_margin,
115
+ control_proximity_px=args.proximity,
116
+ )
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # Output helpers
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ def _resolve_output(output: Path | None, default_name: str) -> Path:
125
+ """Return *output* if given, otherwise ``./outputs/<default_name>``."""
126
+ if output is not None:
127
+ return output
128
+ return _DEFAULT_OUTPUT_DIR / default_name
129
+
130
+
131
+ def _guard_overwrite(output: Path, overwrite: bool) -> None:
132
+ """Abort with a warning when *output* exists and *overwrite* is False."""
133
+ if output.exists() and not overwrite:
134
+ sys.exit(f"Output file already exists: {output}\nUse --overwrite to replace it.")
135
+
136
+
137
+ def _write_results(
138
+ results: dict[str, tuple[int | None, int]],
139
+ output: Path,
140
+ ) -> None:
141
+ """Serialise *results* as JSON and write to *output*."""
142
+ payload = [
143
+ {
144
+ "path": path,
145
+ "control_split_x": cx,
146
+ "thumbnail_width": w,
147
+ "pct": (f"{cx / w * 100:.1f}%" if cx is not None and w > 0 else "N/A"),
148
+ }
149
+ for path, (cx, w) in results.items()
150
+ ]
151
+ output.parent.mkdir(parents=True, exist_ok=True)
152
+ output.write_text(json.dumps(payload, indent=2))
153
+ print(f"Wrote {len(payload)} result(s) to {output}")
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # validate-thumbnails
158
+ # ---------------------------------------------------------------------------
159
+
160
+
161
+ def main_validate_thumbnails(argv: list[str] | None = None) -> None:
162
+ """Entry point for ``rmcontrols-validate-thumbnails``."""
163
+ parser = argparse.ArgumentParser(
164
+ prog="rmcontrols-validate-thumbnails",
165
+ description=(
166
+ "Interactively validate control_split_x for a batch of thumbnail"
167
+ " images matched by a glob pattern."
168
+ ),
169
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
170
+ )
171
+ parser.add_argument(
172
+ "glob",
173
+ help=(
174
+ "Glob pattern for thumbnail images, e.g. 'assets/*.png'."
175
+ " Quote the pattern to prevent shell expansion."
176
+ ),
177
+ )
178
+ parser.add_argument(
179
+ "--output",
180
+ "-o",
181
+ type=Path,
182
+ default=None,
183
+ metavar="PATH",
184
+ help=(
185
+ "Write results as JSON to this file. "
186
+ f"Default: {_DEFAULT_OUTPUT_DIR}/validate_thumbnails.json"
187
+ ),
188
+ )
189
+ _add_common_args(parser)
190
+ args = parser.parse_args(argv)
191
+
192
+ output = _resolve_output(args.output, "validate_thumbnails.json")
193
+ _guard_overwrite(output, args.overwrite)
194
+
195
+ paths = sorted(Path(".").glob(args.glob))
196
+ if not paths:
197
+ sys.exit(f"No files matched: {args.glob!r}")
198
+
199
+ print(f"Found {len(paths)} thumbnail(s) matching {args.glob!r}")
200
+
201
+ from ._validation import validate_control_split_x_batch
202
+
203
+ results = validate_control_split_x_batch(
204
+ paths,
205
+ side=args.side,
206
+ full_debug=args.full_debug,
207
+ **_detection_kwargs(args),
208
+ )
209
+ _write_results(results, output)
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # validate-slides
214
+ # ---------------------------------------------------------------------------
215
+
216
+
217
+ def main_validate_slides(argv: list[str] | None = None) -> None:
218
+ """Entry point for ``rmcontrols-validate-slides``."""
219
+ parser = argparse.ArgumentParser(
220
+ prog="rmcontrols-validate-slides",
221
+ description=(
222
+ "Interactively validate control_split_x for a batch of whole-slide"
223
+ " images matched by a glob pattern. Accepts local paths or"
224
+ " s3:// URIs. Requires openslide-python."
225
+ ),
226
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
227
+ )
228
+ parser.add_argument(
229
+ "glob",
230
+ help=(
231
+ "Glob pattern for local WSI files (e.g. 'slides/*.mrxs') or an"
232
+ " S3 URI / S3 glob pattern"
233
+ " (e.g. 's3://bucket/slides/*.mrxs')."
234
+ " Quote the pattern to prevent shell expansion."
235
+ ),
236
+ )
237
+ parser.add_argument(
238
+ "--thumbnail-size",
239
+ type=int,
240
+ default=1000,
241
+ metavar="PX",
242
+ help="Maximum dimension (width or height) of the extracted thumbnail",
243
+ )
244
+ parser.add_argument(
245
+ "--output",
246
+ "-o",
247
+ type=Path,
248
+ default=None,
249
+ metavar="PATH",
250
+ help=(
251
+ "Write results as JSON to this file. "
252
+ f"Default: {_DEFAULT_OUTPUT_DIR}/validate_slides.json"
253
+ ),
254
+ )
255
+ _add_common_args(parser)
256
+ args = parser.parse_args(argv)
257
+
258
+ output = _resolve_output(args.output, "validate_slides.json")
259
+ _guard_overwrite(output, args.overwrite)
260
+
261
+ glob_str = args.glob
262
+ if glob_str.startswith("s3://"):
263
+ if any(c in glob_str for c in ("*", "?", "[")):
264
+ from ._s3 import glob_s3
265
+
266
+ paths = glob_s3(glob_str)
267
+ if not paths:
268
+ sys.exit(f"No S3 objects matched: {glob_str!r}")
269
+ else:
270
+ paths = [glob_str]
271
+ else:
272
+ paths = sorted(str(p) for p in Path(".").glob(glob_str))
273
+ if not paths:
274
+ sys.exit(f"No files matched: {glob_str!r}")
275
+
276
+ print(f"Found {len(paths)} slide(s)")
277
+
278
+ from ._validation import validate_control_split_x_wsi
279
+
280
+ results = validate_control_split_x_wsi(
281
+ paths,
282
+ side=args.side,
283
+ thumbnail_size=args.thumbnail_size,
284
+ full_debug=args.full_debug,
285
+ **_detection_kwargs(args),
286
+ )
287
+ _write_results(results, output)