pixel-patrol-loader-bio 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: pixel-patrol-loader-bio
3
+ Version: 0.1.0
4
+ Summary: PixelPatrol add-on package for loading images with bioio and zarr
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: bioio>=3.0.0
8
+ Requires-Dist: bioio-tifffile>=1.3.0
9
+ Requires-Dist: bioio-ome-tiff>=1.3.0
10
+ Requires-Dist: bioio-ome-zarr>=2.2.0
11
+ Requires-Dist: bioio-imageio==1.3.0
12
+ Requires-Dist: pixel-patrol-base>=0.1.0
13
+ Requires-Dist: zarr>=3.1.1
14
+ Requires-Dist: tifffile==2025.9.9
@@ -0,0 +1,35 @@
1
+ [project]
2
+ name = "pixel-patrol-loader-bio"
3
+ version = "0.1.0"
4
+ description = "PixelPatrol add-on package for loading images with bioio and zarr"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "bioio>=3.0.0",
9
+ "bioio-tifffile>=1.3.0",
10
+ "bioio-ome-tiff>=1.3.0",
11
+ "bioio-ome-zarr>=2.2.0",
12
+ "bioio-imageio==1.3.0",
13
+ "pixel-patrol-base>=0.1.0",
14
+ "zarr>=3.1.1",
15
+ "tifffile==2025.9.9",
16
+ ]
17
+
18
+ [tool.uv]
19
+ package = true
20
+
21
+ [tool.hatch.build.targets.wheel]
22
+ packages = ["src/pixel_patrol_loader_bio"]
23
+
24
+ [project.entry-points."pixel_patrol.loader_plugins"]
25
+ pixel_patrol_loader_bio_loader_builtins = "pixel_patrol_loader_bio.plugin_registry:register_loader_plugins"
26
+
27
+
28
+ [dependency-groups]
29
+ dev = [
30
+ "pytest>=8.3.5",
31
+ "pytest-mock>=3.14.1",
32
+ ]
33
+
34
+ [tool.uv.sources]
35
+ pixel-patrol-base = { path = "../pixel-patrol-base" }
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,2 @@
1
+ # TODO: Isnt this a problem - same config file for both loaders?
2
+ STANDARD_DIM_ORDER = "TCZYXS"
@@ -0,0 +1,8 @@
1
+ from pixel_patrol_loader_bio.plugins.loaders.bioio_loader import BioIoLoader
2
+ from pixel_patrol_loader_bio.plugins.loaders.zarr_loader import ZarrLoader
3
+
4
+ def register_loader_plugins():
5
+ return [
6
+ BioIoLoader,
7
+ ZarrLoader,
8
+ ]
@@ -0,0 +1,29 @@
1
+ from pathlib import Path
2
+ import zarr
3
+
4
+ def is_zarr_store(path: Path) -> bool:
5
+ """
6
+ Robustly checks if a given path is a Zarr store (v2 or v3).
7
+
8
+ This function uses the zarr library to attempt opening the store, which
9
+ correctly handles both Zarr v2 and v3 specifications.
10
+
11
+ Args:
12
+ path: The pathlib.Path object to check.
13
+
14
+ Returns:
15
+ True if the path is a valid Zarr store, False otherwise.
16
+ """
17
+ try:
18
+ store_obj = zarr.open(store=str(path.absolute()), mode='r')
19
+
20
+ if isinstance(store_obj, zarr.Group):
21
+ # A group is "processable" if it has any custom attributes.
22
+ # A generic container group will have empty attrs.
23
+ return bool(store_obj.attrs)
24
+
25
+ return True
26
+
27
+ except Exception as e:
28
+ # Catches any error, indicating it's not a valid or accessible Zarr store.
29
+ return False
@@ -0,0 +1,111 @@
1
+ import logging
2
+ import math
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List, Optional, Set
5
+
6
+ import bioio_imageio
7
+ import numpy as np
8
+ import polars as pl
9
+ from bioio import BioImage
10
+ from bioio_base.exceptions import UnsupportedFileFormatError
11
+
12
+ from pixel_patrol_base.core.record import record_from
13
+ from pixel_patrol_loader_bio.plugins.loaders._utils import is_zarr_store
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def _extract_metadata(img: Any) -> Dict[str, Any]:
19
+ """
20
+ Extract metadata from a BioImage-like object into a flat dict.
21
+ """
22
+ metadata: Dict[str, Any] = {}
23
+
24
+ # Dim order and per-dimension sizes (e.g., X_size, Y_size, Z_size, C_size, T_size)
25
+ dim_order = getattr(getattr(img, 'dims', None), 'order', '')
26
+ metadata["dim_order"] = dim_order
27
+ for letter in dim_order:
28
+ dim_size= getattr(img.dims, letter, None)
29
+ if not dim_size:
30
+ dim_size = 1
31
+ metadata[f"{letter}_size"] = int(dim_size)
32
+
33
+ dim_names = getattr(getattr(img, 'dims', None), 'names', None)
34
+ if isinstance(dim_names, (list, tuple)) and all(isinstance(x, str) for x in dim_names):
35
+ metadata["dim_names"] = list(dim_names)
36
+
37
+ metadata["n_images"] = len(img.scenes) if hasattr(img, "scenes") else 1
38
+
39
+ if hasattr(img, "physical_pixel_sizes"):
40
+ for ax in ("X", "Y", "Z", "T"):
41
+ metadata[f"pixel_size_{ax}"] = getattr(img.physical_pixel_sizes, ax, None)
42
+
43
+ if hasattr(img, "channel_names"):
44
+ metadata["channel_names"] = img.channel_names
45
+
46
+ if hasattr(img, "dtype"):
47
+ metadata["dtype"] = str(img.dtype)
48
+
49
+ if hasattr(img, "shape"):
50
+ metadata["shape"] = np.array(img.shape)
51
+ metadata["ndim"] = len(img.shape)
52
+ metadata["num_pixels"] = math.prod(img.shape)
53
+
54
+ return metadata
55
+
56
+
57
+ def _load_bioio_image(file_path: Path) -> Optional[BioImage]:
58
+ """
59
+ Try BioImage, then fall back to imageio reader; return None if both fail.
60
+ """
61
+ try:
62
+ return BioImage(file_path)
63
+ except UnsupportedFileFormatError:
64
+ try:
65
+ return BioImage(file_path, reader=bioio_imageio.Reader)
66
+ except Exception as e:
67
+ logger.warning(f"Could not load '{file_path}' with BioImage (imageio fallback): {e}")
68
+ return None
69
+ except Exception as e:
70
+ logger.warning(f"Could not load '{file_path}' with BioImage: {e}")
71
+ return None
72
+
73
+ class BioIoLoader:
74
+ """
75
+ Loader that produces an record from BioIO/BioImage.
76
+ Protocol: single `load()` method returning an Record.
77
+ """
78
+
79
+ NAME = "bioio"
80
+
81
+ SUPPORTED_EXTENSIONS: Set[str] = {"czi", "tif", "tiff", "ome.tif", "nd2", "lif", "jpg", "jpeg", "png", "bmp", "ome.zarr"}
82
+
83
+ OUTPUT_SCHEMA: Dict[str, Any] = {
84
+ "dim_order": str,
85
+ "dim_names": list,
86
+ "n_images": int,
87
+ "num_pixels": int,
88
+ "shape": pl.Array, # or use `list` if you prefer to avoid polars types here
89
+ "ndim": int,
90
+ "channel_names": list, # could be list[str]
91
+ "dtype": str,
92
+ }
93
+
94
+ OUTPUT_SCHEMA_PATTERNS: List[tuple[str, Any]] = [
95
+ (r"^pixel_size_[A-Za-z]$", float),
96
+ (r"^[A-Za-z]_size$", int),
97
+ ]
98
+
99
+ FOLDER_EXTENSIONS: Set[str] = {"zarr", "ome.zarr"}
100
+
101
+ def is_folder_supported(self, path: Path) -> bool:
102
+ return is_zarr_store(path)
103
+
104
+ def load(self, source: str):
105
+ img = _load_bioio_image(Path(source))
106
+ if img is None:
107
+ raise UnsupportedFileFormatError(self.NAME, path=source)
108
+
109
+ meta = _extract_metadata(img)
110
+ # dask-backed array; Record encapsulates axes/capabilities from meta["dim_order"]
111
+ return record_from(img.dask_data, meta, kind="intensity")
@@ -0,0 +1,198 @@
1
+ import logging
2
+ import string
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Optional, Set, List, Mapping
5
+
6
+ import dask.array as da
7
+ import numpy as np
8
+ import zarr
9
+
10
+ from pixel_patrol_base.core.record import record_from
11
+ from pixel_patrol_loader_bio.plugins.loaders._utils import is_zarr_store
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def _load_zarr_array(path: Path) -> Optional[da.Array]:
17
+ try:
18
+ # 1) Try as a direct array path
19
+ return da.from_zarr(str(path))
20
+ except Exception as e1:
21
+ try:
22
+ # 2) Try as a group with NGFF multiscales
23
+ root = zarr.open(str(path), mode="r")
24
+ candidates = []
25
+
26
+ if isinstance(root, zarr.Group):
27
+ attrs = dict(root.attrs)
28
+ # NGFF: multiscales[0].datasets[*].path (often "0")
29
+ for d in attrs.get("multiscales", [{}])[0].get("datasets", []):
30
+ p = d.get("path")
31
+ if p:
32
+ candidates.append(p)
33
+
34
+ # Common fallbacks
35
+ candidates += ["0", "data"]
36
+
37
+ # Single-array group: use that array’s name
38
+ if not candidates:
39
+ arrays = list(root.arrays())
40
+ if len(arrays) == 1:
41
+ candidates.append(arrays[0][0])
42
+
43
+ for comp in candidates:
44
+ try:
45
+ return da.from_zarr(str(path), component=comp)
46
+ except Exception:
47
+ pass
48
+
49
+ # 4) Last resort: open with zarr and wrap with dask
50
+ arr = zarr.open_array(str(path), mode="r")
51
+ return da.from_array(arr, chunks=arr.chunks)
52
+ except Exception as e2:
53
+ logger.warning(
54
+ f"Could not load '{path}' as a Zarr array (tried as array/group): {e1}; {e2}"
55
+ )
56
+ return None
57
+
58
+
59
+ def _infer_dim_order(n: int) -> str:
60
+ """
61
+ Infer a simple dim order assuming the last two dims are YX.
62
+ Preceding dims are assigned A,B,C,... in order.
63
+ """
64
+ if n <= 2:
65
+ return "YX"[-n:] # n==0 -> "", n==1 -> "X", n==2 -> "YX"
66
+ return string.ascii_uppercase[: n - 2] + "YX"
67
+
68
+
69
+ def _read_zarr_root_and_primary_attrs(path: Path) -> Dict[str, Any]:
70
+ """
71
+ Read merged attributes from the Zarr root AND the primary child array
72
+ (first of "0" or "data" if present). Returns a flat dict.
73
+ """
74
+ attrs: Dict[str, Any] = {}
75
+ try:
76
+ root = zarr.open(str(path), mode="r")
77
+ attrs.update(dict(getattr(root, "attrs", {}) or {}))
78
+ try:
79
+ if hasattr(root, "arrays"):
80
+ for name, item in root.arrays():
81
+ if name in ("data", "0"):
82
+ attrs.update(dict(getattr(item, "attrs", {}) or {}))
83
+ break
84
+ except Exception:
85
+ pass
86
+ except Exception as e:
87
+ logger.warning(f"Could not read Zarr attributes from '{path}': {e}")
88
+ return attrs
89
+
90
+
91
+ def _extract_ngff_dim_names(attrs: Mapping[str, Any], ndim: int) -> Optional[List[str]]:
92
+ """
93
+ Parse OME-NGFF axes from attrs -> return dim_names if available
94
+ """
95
+ if not isinstance(attrs, dict):
96
+ return None
97
+
98
+ ms = attrs.get("multiscales")
99
+ if not (isinstance(ms, list) and ms):
100
+ return None
101
+
102
+ first = ms[0]
103
+ if not isinstance(first, dict):
104
+ return None
105
+
106
+ axes = first.get("axes")
107
+ if not isinstance(axes, list):
108
+ return None
109
+
110
+ if len(axes) != int(ndim):
111
+ logger.warning("NGFF: axes length (%s) != array ndim (%s); ignoring axes", len(axes), ndim)
112
+ return None
113
+
114
+ names: List[str] = []
115
+ for a in axes:
116
+ if isinstance(a, str):
117
+ n = a
118
+ elif isinstance(a, dict) and isinstance(a.get("name"), str):
119
+ n = a["name"]
120
+ else:
121
+ logger.warning("NGFF: malformed axis entry %r; ignoring axes", a)
122
+ return None
123
+ names.append(n)
124
+
125
+ return names
126
+
127
+
128
+ def _extract_zarr_metadata(arr: da.Array, path: Path) -> Dict[str, Any]:
129
+ meta: Dict[str, Any] = {}
130
+ ndim = int(getattr(arr, "ndim", len(getattr(arr, "shape", []) or [])))
131
+
132
+ attrs = _read_zarr_root_and_primary_attrs(path)
133
+ if attrs:
134
+ meta["zarr_attributes"] = attrs
135
+
136
+ dim_names = _extract_ngff_dim_names(attrs, ndim)
137
+
138
+ if dim_names and all(isinstance(n, str) and len(n) == 1 for n in dim_names):
139
+ dim_order = "".join(n.upper() for n in dim_names)
140
+ else:
141
+ dim_order = _infer_dim_order(ndim)
142
+
143
+ if not dim_names:
144
+ dim_names = [f"dim{c}" for c in dim_order]
145
+
146
+ meta["dim_order"] = dim_order
147
+ meta["dim_names"] = dim_names
148
+
149
+ meta["shape"] = np.array(arr.shape, dtype=int)
150
+ meta["dtype"] = str(arr.dtype)
151
+ meta["ndim"] = arr.ndim
152
+ meta["num_pixels"] = int(np.prod(arr.shape))
153
+ chunks = getattr(arr, "chunksize", None)
154
+ meta["chunks"] = chunks if chunks is not None else arr.chunks
155
+
156
+ for i, ax in enumerate(dim_order):
157
+ meta[f"{ax}_size"] = int(arr.shape[i])
158
+
159
+ return meta
160
+
161
+
162
+ class ZarrLoader:
163
+ """
164
+ Loader that produces an Record from Zarr.
165
+ Protocol: single `load()` returning an Record.
166
+ """
167
+
168
+ NAME = "zarr"
169
+
170
+ SUPPORTED_EXTENSIONS: Set[str] = {"zarr"}
171
+
172
+ OUTPUT_SCHEMA: Dict[str, Any] = {
173
+ "dim_order": str,
174
+ "dim_names": list,
175
+ "n_images": int,
176
+ "channel_names": list,
177
+ "dtype": str,
178
+ "zarr_attributes": dict,
179
+ }
180
+
181
+ OUTPUT_SCHEMA_PATTERNS = [
182
+ (r"^[A-Za-z]_size$", int),
183
+ ]
184
+
185
+ FOLDER_EXTENSIONS: Set[str] = {"zarr", "ome.zarr"}
186
+
187
+ def is_folder_supported(self, path: Path) -> bool:
188
+ return is_zarr_store(path)
189
+
190
+ def load(self, source: str):
191
+ path = Path(source)
192
+
193
+ arr = _load_zarr_array(path)
194
+ if arr is None:
195
+ raise RuntimeError(f"Cannot read Zarr array at: {source}")
196
+
197
+ meta = _extract_zarr_metadata(arr, path)
198
+ return record_from(arr, meta, kind="intensity")
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: pixel-patrol-loader-bio
3
+ Version: 0.1.0
4
+ Summary: PixelPatrol add-on package for loading images with bioio and zarr
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: bioio>=3.0.0
8
+ Requires-Dist: bioio-tifffile>=1.3.0
9
+ Requires-Dist: bioio-ome-tiff>=1.3.0
10
+ Requires-Dist: bioio-ome-zarr>=2.2.0
11
+ Requires-Dist: bioio-imageio==1.3.0
12
+ Requires-Dist: pixel-patrol-base>=0.1.0
13
+ Requires-Dist: zarr>=3.1.1
14
+ Requires-Dist: tifffile==2025.9.9
@@ -0,0 +1,16 @@
1
+ pyproject.toml
2
+ src/pixel_patrol_loader_bio/config.py
3
+ src/pixel_patrol_loader_bio/plugin_registry.py
4
+ src/pixel_patrol_loader_bio.egg-info/PKG-INFO
5
+ src/pixel_patrol_loader_bio.egg-info/SOURCES.txt
6
+ src/pixel_patrol_loader_bio.egg-info/dependency_links.txt
7
+ src/pixel_patrol_loader_bio.egg-info/entry_points.txt
8
+ src/pixel_patrol_loader_bio.egg-info/requires.txt
9
+ src/pixel_patrol_loader_bio.egg-info/top_level.txt
10
+ src/pixel_patrol_loader_bio/plugins/loaders/_utils.py
11
+ src/pixel_patrol_loader_bio/plugins/loaders/bioio_loader.py
12
+ src/pixel_patrol_loader_bio/plugins/loaders/zarr_loader.py
13
+ tests/test_bioio_import.py
14
+ tests/test_project_settings.py
15
+ tests/test_records_df.py
16
+ tests/test_zarr_image.py
@@ -0,0 +1,2 @@
1
+ [pixel_patrol.loader_plugins]
2
+ pixel_patrol_loader_bio_loader_builtins = pixel_patrol_loader_bio.plugin_registry:register_loader_plugins
@@ -0,0 +1,8 @@
1
+ bioio>=3.0.0
2
+ bioio-tifffile>=1.3.0
3
+ bioio-ome-tiff>=1.3.0
4
+ bioio-ome-zarr>=2.2.0
5
+ bioio-imageio==1.3.0
6
+ pixel-patrol-base>=0.1.0
7
+ zarr>=3.1.1
8
+ tifffile==2025.9.9
@@ -0,0 +1,104 @@
1
+ from pathlib import Path
2
+ from typing import Dict, Any
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from pixel_patrol_loader_bio.config import STANDARD_DIM_ORDER
8
+ from pixel_patrol_loader_bio.plugins.loaders.bioio_loader import BioIoLoader
9
+ from pixel_patrol_base.config import SPRITE_SIZE
10
+ from pixel_patrol_base.core.processing import get_all_record_properties
11
+ from pixel_patrol_base.plugin_registry import discover_processor_plugins
12
+
13
+
14
+ @pytest.fixture(scope="module")
15
+ def loader():
16
+ return BioIoLoader()
17
+
18
+ @pytest.fixture(scope="module")
19
+ def processors():
20
+ return discover_processor_plugins()
21
+
22
+ @pytest.fixture(scope="module")
23
+ def standard_dim_order():
24
+ return STANDARD_DIM_ORDER
25
+
26
+
27
+ def get_image_files_from_data_dir(test_data_dir: Path):
28
+ """Helper to get all image files from the test_data_dir, excluding non-image files."""
29
+ return [f for f in test_data_dir.rglob("*")
30
+ if f.is_file() and f.name != "not_an_image.txt"]
31
+
32
+
33
+ def test_nonexistent_path_raises(tmp_path, loader, processors):
34
+ missing = tmp_path / "nope.tiny_png"
35
+ assert get_all_record_properties(missing, loader=loader, processors=processors) == {}
36
+
37
+
38
+ def test_unsupported_file(test_data_dir: Path, loader, processors):
39
+ non_image_file = test_data_dir / "not_an_image.txt"
40
+ properties = get_all_record_properties(non_image_file, loader=loader, processors=processors)
41
+ assert properties == {}, f"Expected empty dict for non-image file, got {properties}"
42
+
43
+
44
+ def test_empty_or_corrupt_image(tmp_path, loader, processors):
45
+ f = tmp_path / "zero.tif"
46
+ f.write_bytes(b"")
47
+ assert get_all_record_properties(f, loader=loader, processors=processors) == {}
48
+
49
+
50
+ def test_bioio_image_properties_per_file(
51
+ image_file_path: Path,
52
+ loader, processors,
53
+ standard_dim_order: str,
54
+ expected_image_data: Dict[str, Dict[str, Any]]
55
+ ):
56
+ """
57
+ Detailed metadata checks for files that have expectations in `expected_image_data`,
58
+ and general sanity checks for all other files.
59
+ """
60
+ file_name = image_file_path.name
61
+ actual_properties = get_all_record_properties(image_file_path, loader=loader, processors=processors)
62
+
63
+ assert actual_properties is not None, f"Failed to get properties for {file_name}"
64
+ assert actual_properties != {}, f"Properties dictionary is empty for {file_name}"
65
+
66
+ # Always enforce core presence
67
+ assert "shape" in actual_properties, f"Missing 'shape' for {file_name}"
68
+ assert "dtype" in actual_properties, f"Missing 'dtype' for {file_name}"
69
+ assert "ndim" in actual_properties, f"Missing 'ndim' for {file_name}"
70
+
71
+ # If we have expectations for this file, check them specifically.
72
+ expected = expected_image_data.get(file_name)
73
+ if expected:
74
+ if "dtype" in expected:
75
+ assert expected["dtype"] in str(actual_properties["dtype"]), \
76
+ f"Dtype mismatch for {file_name}: expected contains {expected['dtype']}, got {actual_properties['dtype']}"
77
+ if "min_ndim" in expected:
78
+ assert actual_properties["ndim"] >= expected["min_ndim"], \
79
+ f"ndim too small for {file_name}: expected >= {expected['min_ndim']}, got {actual_properties['ndim']}"
80
+
81
+
82
+ def test_all_image_files_load_and_standardize(
83
+ image_file_path: Path,
84
+ loader, processors,
85
+ standard_dim_order: str
86
+ ):
87
+ """
88
+ Ensure all image files can be loaded by bioio, standardized, and a thumbnail generated.
89
+ """
90
+ file_name = image_file_path.name
91
+ properties = get_all_record_properties(image_file_path, loader=loader, processors=processors)
92
+
93
+ assert properties is not None, f"Failed to get properties for {file_name}"
94
+ assert properties != {}, f"Properties dictionary is empty for {file_name}"
95
+
96
+ assert "dim_order" in properties, f"Missing dim_order for {file_name}"
97
+ assert "thumbnail" in properties, f"Missing thumbnail for {file_name}"
98
+ assert isinstance(properties["thumbnail"], np.ndarray), f"Thumbnail not a numpy array for {file_name}"
99
+ assert properties["thumbnail"].shape == (SPRITE_SIZE, SPRITE_SIZE), \
100
+ f"Thumbnail size mismatch for {file_name}: Expected ({SPRITE_SIZE}, {SPRITE_SIZE}), Got {properties['thumbnail'].shape}"
101
+
102
+ assert "shape" in properties, f"Missing shape for {file_name}"
103
+ assert "dtype" in properties, f"Missing dtype for {file_name}"
104
+ assert "ndim" in properties, f"Missing ndim for {file_name}"
@@ -0,0 +1,211 @@
1
+ import pytest
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ from pixel_patrol_loader_bio.plugins.loaders.bioio_loader import BioIoLoader
6
+ from pixel_patrol_base.core.project import Project
7
+ from pixel_patrol_base.core.project_settings import Settings
8
+ from pixel_patrol_base import api
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+
12
+
13
+ @pytest.fixture
14
+ def named_project_with_base_dir(tmp_path: Path) -> Project:
15
+ """Provides a Project instance with a base directory set."""
16
+ return api.create_project("TestProject", tmp_path, loader="bioio")
17
+
18
+
19
+ def test_get_settings_initial(named_project_with_base_dir: Project):
20
+ """Test retrieving default settings from a newly created project."""
21
+ settings = api.get_settings(named_project_with_base_dir)
22
+ assert isinstance(settings, Settings)
23
+ assert settings.cmap == "rainbow"
24
+ assert settings.n_example_files == 9
25
+ assert settings.selected_file_extensions == set()
26
+
27
+
28
+ def test_set_settings_valid(named_project_with_base_dir: Project):
29
+ """Test setting and retrieving valid new settings."""
30
+ new_settings = Settings(cmap="viridis", n_example_files=5, selected_file_extensions={"jpg", "png"})
31
+ updated_project = api.set_settings(named_project_with_base_dir, new_settings)
32
+ retrieved_settings = api.get_settings(updated_project)
33
+ assert retrieved_settings.cmap == "viridis"
34
+ assert retrieved_settings.n_example_files == 5
35
+ assert retrieved_settings.selected_file_extensions == {"jpg", "png"}
36
+
37
+
38
+ def test_set_settings_invalid_cmap(named_project_with_base_dir: Project):
39
+ """Test setting settings with an invalid colormap name."""
40
+ invalid_settings = Settings(cmap="non_existent_cmap")
41
+ with pytest.raises(ValueError, match="Invalid colormap name"):
42
+ api.set_settings(named_project_with_base_dir, invalid_settings)
43
+
44
+
45
+ def test_set_settings_invalid_n_example_images(named_project_with_base_dir: Project):
46
+ """Test setting n_example_images with invalid values (too low, too high, wrong type)."""
47
+ invalid_settings_low = Settings(n_example_files=0)
48
+ with pytest.raises(ValueError, match="Number of example files must be an integer between 1 and 19"):
49
+ api.set_settings(named_project_with_base_dir, invalid_settings_low)
50
+
51
+ invalid_settings_high = Settings(n_example_files=20)
52
+ with pytest.raises(ValueError, match="Number of example files must be an integer between 1 and 19"):
53
+ api.set_settings(named_project_with_base_dir, invalid_settings_high)
54
+
55
+ invalid_settings_type = Settings(n_example_files=9.5)
56
+ with pytest.raises(ValueError, match="Number of example files must be an integer between 1 and 19"):
57
+ api.set_settings(named_project_with_base_dir, invalid_settings_type)
58
+
59
+
60
+ def test_set_settings_set_selected_file_extensions_empty_initially(named_project_with_base_dir: Project, caplog):
61
+ """Test that selected file extensions can be set to an empty set initially."""
62
+ new_settings = Settings(selected_file_extensions=set())
63
+ with caplog.at_level(logging.WARNING):
64
+ updated_project = api.set_settings(named_project_with_base_dir, new_settings)
65
+ assert api.get_settings(updated_project).selected_file_extensions == set()
66
+ assert "selected_file_extensions is an empty set - no file will be processed" in caplog.text
67
+
68
+
69
+ def test_set_settings_set_selected_file_extensions_with_unsupported(named_project_with_base_dir: Project, caplog):
70
+ """Test setting extensions including unsupported types."""
71
+ mixed_extensions = {"jpg", "xyz", "tiff"} # jpg, tiff are supported, xyz is not
72
+ expected_extensions = {"jpg", "tiff"}
73
+ with caplog.at_level(logging.WARNING):
74
+ new_settings = Settings(selected_file_extensions=mixed_extensions)
75
+ updated_project = api.set_settings(named_project_with_base_dir, new_settings)
76
+ assert api.get_settings(updated_project).selected_file_extensions == expected_extensions
77
+ assert "The following file extensions are not supported and will be ignored: xyz." in caplog.text
78
+
79
+
80
+ def test_set_settings_set_selected_file_extensions_only_unsupported(named_project_with_base_dir: Project, caplog):
81
+ """Test setting extensions with only unsupported types results in empty set."""
82
+ unsupported_extensions = {"xyz", "abc"}
83
+ with caplog.at_level(logging.WARNING):
84
+ new_settings = Settings(selected_file_extensions=unsupported_extensions)
85
+ updated_project = api.set_settings(named_project_with_base_dir, new_settings)
86
+ assert api.get_settings(updated_project).selected_file_extensions == set()
87
+ assert "The following file extensions are not supported and will be ignored:" in caplog.text
88
+ assert "abc" in caplog.text
89
+ assert "xyz" in caplog.text
90
+ assert "No loader supported file extensions provided. No files will be processed." in caplog.text
91
+
92
+ def test_set_settings_set_selected_file_extensions_to_all(named_project_with_base_dir: Project, caplog):
93
+ """Test setting selected_file_extensions to the string 'all'."""
94
+ new_settings = Settings(selected_file_extensions="all")
95
+ with caplog.at_level(logging.INFO):
96
+ updated_project = api.set_settings(named_project_with_base_dir, new_settings)
97
+ assert api.get_settings(updated_project).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
98
+ assert "Using loader-supported extensions:" in caplog.text
99
+
100
+
101
+ def test_set_settings_invalid_string_for_extensions(named_project_with_base_dir: Project, caplog):
102
+ """Test setting selected_file_extensions to an invalid string (not 'all')."""
103
+ invalid_settings = Settings(selected_file_extensions="invalid_string")
104
+ with pytest.raises(TypeError, match=r"selected_file_extensions must be 'all' or a Set\[str\]\."):
105
+ with caplog.at_level(logging.ERROR):
106
+ api.set_settings(named_project_with_base_dir, invalid_settings)
107
+ assert "Invalid type for selected_file_extensions: <class 'str'>." in caplog.text
108
+
109
+
110
+ def test_set_settings_invalid_type_for_extensions(named_project_with_base_dir: Project, caplog):
111
+ """Test setting selected_file_extensions to an invalid type."""
112
+ invalid_settings = Settings(selected_file_extensions=["jpg", "png"]) # List instead of Set
113
+ with pytest.raises(TypeError, match=r"selected_file_extensions must be 'all' or a Set\[str\]\."):
114
+ with caplog.at_level(logging.ERROR):
115
+ api.set_settings(named_project_with_base_dir, invalid_settings)
116
+ assert "Invalid type for selected_file_extensions: <class 'list'>." in caplog.text
117
+
118
+
119
+ def test_set_settings_change_selected_file_extensions_after_initial_set_different_set(
120
+ named_project_with_base_dir: Project, caplog):
121
+ initial_settings = Settings(selected_file_extensions={"jpg"})
122
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
123
+ assert api.get_settings(project_with_ext).selected_file_extensions == {"jpg"}
124
+
125
+ changed_settings = Settings(selected_file_extensions={"png"})
126
+
127
+ with caplog.at_level(logging.INFO):
128
+ updated_project = api.set_settings(project_with_ext, changed_settings)
129
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
130
+
131
+ assert api.get_settings(updated_project).selected_file_extensions == {"jpg"}
132
+
133
+
134
+ def test_set_settings_change_selected_file_extensions_after_initial_set_to_empty(named_project_with_base_dir: Project,
135
+ caplog):
136
+ initial_settings = Settings(selected_file_extensions={"jpg"})
137
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
138
+ assert api.get_settings(project_with_ext).selected_file_extensions == {"jpg"}
139
+
140
+ changed_settings = Settings(selected_file_extensions=set())
141
+
142
+ with caplog.at_level(logging.INFO):
143
+ updated_project = api.set_settings(project_with_ext, changed_settings)
144
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
145
+
146
+ assert api.get_settings(updated_project).selected_file_extensions == {"jpg"}
147
+
148
+
149
+ def test_set_settings_change_selected_file_extensions_from_all_to_set(named_project_with_base_dir: Project, caplog):
150
+ initial_settings = Settings(selected_file_extensions="all")
151
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
152
+ assert api.get_settings(project_with_ext).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
153
+
154
+ changed_settings = Settings(selected_file_extensions={"jpg"})
155
+
156
+ with caplog.at_level(logging.INFO):
157
+ updated_project = api.set_settings(project_with_ext, changed_settings)
158
+ # INFO message asserted in other tests; keeping behavior consistent.
159
+
160
+
161
+ def test_set_settings_change_selected_file_extensions_from_set_to_all(named_project_with_base_dir: Project, caplog):
162
+ initial_settings = Settings(selected_file_extensions={"jpg"})
163
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
164
+ assert api.get_settings(project_with_ext).selected_file_extensions == {"jpg"}
165
+
166
+ changed_settings = Settings(selected_file_extensions="all")
167
+
168
+ with caplog.at_level(logging.INFO):
169
+ updated_project = api.set_settings(project_with_ext, changed_settings)
170
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
171
+
172
+ assert api.get_settings(updated_project).selected_file_extensions == {"jpg"}
173
+
174
+
175
+ def test_set_settings_set_selected_file_extensions_to_same_set_already_defined(named_project_with_base_dir: Project,
176
+ caplog):
177
+ initial_settings = Settings(selected_file_extensions={"jpg"})
178
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
179
+ assert api.get_settings(project_with_ext).selected_file_extensions == {"jpg"}
180
+
181
+ same_settings = Settings(selected_file_extensions={"jpg"})
182
+ with caplog.at_level(logging.INFO):
183
+ updated_project = api.set_settings(project_with_ext, same_settings)
184
+ assert api.get_settings(updated_project).selected_file_extensions == {"jpg"}
185
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
186
+
187
+
188
+ def test_set_settings_set_selected_file_extensions_to_all_when_already_default_set(named_project_with_base_dir: Project,
189
+ caplog):
190
+ initial_settings = Settings(selected_file_extensions=BioIoLoader.SUPPORTED_EXTENSIONS)
191
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
192
+ assert api.get_settings(project_with_ext).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
193
+
194
+ new_settings = Settings(selected_file_extensions="all")
195
+ with caplog.at_level(logging.INFO):
196
+ updated_project = api.set_settings(project_with_ext, new_settings)
197
+ assert api.get_settings(updated_project).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
198
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
199
+
200
+
201
+ def test_set_settings_set_selected_file_extensions_to_default_set_when_already_all_string(
202
+ named_project_with_base_dir: Project, caplog):
203
+ initial_settings = Settings(selected_file_extensions="all")
204
+ project_with_ext = api.set_settings(named_project_with_base_dir, initial_settings)
205
+ assert api.get_settings(project_with_ext).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
206
+
207
+ same_as_default_set_settings = Settings(selected_file_extensions=BioIoLoader.SUPPORTED_EXTENSIONS)
208
+ with caplog.at_level(logging.INFO):
209
+ updated_project = api.set_settings(project_with_ext, same_as_default_set_settings)
210
+ assert api.get_settings(updated_project).selected_file_extensions == BioIoLoader.SUPPORTED_EXTENSIONS
211
+ assert "selected_file_extensions already set; keeping existing value:" in caplog.text
@@ -0,0 +1,392 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from unittest.mock import patch
4
+
5
+ import numpy as np
6
+ import polars as pl
7
+ import pytest
8
+ import tifffile
9
+ from PIL import Image
10
+
11
+ from pixel_patrol_loader_bio.config import STANDARD_DIM_ORDER
12
+ from pixel_patrol_loader_bio.plugins.loaders.bioio_loader import BioIoLoader
13
+ from pixel_patrol_base.core import processing
14
+ from pixel_patrol_base.core.processing import (
15
+ build_records_df,
16
+ _scan_dirs_for_extensions,
17
+ _build_deep_record_df,
18
+ PATHS_DF_EXPECTED_SCHEMA,
19
+ )
20
+ from pixel_patrol_base.plugin_registry import discover_processor_plugins
21
+ from pixel_patrol_base.utils.df_utils import postprocess_basic_file_metadata_df
22
+
23
+ @pytest.fixture
24
+ def loader():
25
+ """Provides a fresh BioIoLoader for each test function."""
26
+ return BioIoLoader()
27
+
28
+ @pytest.fixture
29
+ def processors():
30
+ """Provides a fresh list of processor plugins for each test function."""
31
+ return discover_processor_plugins()
32
+
33
+
34
+ def test_build_records_df_from_file_system_no_images(tmp_path):
35
+ non_image_dir = tmp_path / "non_image_files"
36
+ non_image_dir.mkdir()
37
+ (non_image_dir / "document.txt").write_text("This is a text file.")
38
+ (non_image_dir / "data.csv").write_text("1,2,3\n4,5,6")
39
+
40
+ paths = [non_image_dir]
41
+ extensions = {"png", "jpg"}
42
+
43
+ with patch('pixel_patrol_base.core.processing._build_deep_record_df', return_value=pl.DataFrame()) as mock_get_deep_records_df:
44
+ records_df = build_records_df(paths, extensions, "bioio")
45
+ assert records_df is None
46
+ mock_get_deep_records_df.assert_not_called()
47
+
48
+
49
+ def test_scan_dirs_for_extensions_filters_correct_extensions(tmp_path):
50
+ dir1 = tmp_path / "dir1"; dir1.mkdir()
51
+ dir2 = tmp_path / "dir2"; dir2.mkdir()
52
+ (dir1 / "a.jpg").write_bytes(b"")
53
+ (dir1 / "b.png").write_bytes(b"")
54
+ (dir1 / "c.txt").write_bytes(b"")
55
+ (dir2 / "d.JPG").write_bytes(b"")
56
+ result = _scan_dirs_for_extensions([dir1, dir2], {"jpg", "png"})
57
+ expected = {
58
+ (dir1 / "a.jpg", dir1),
59
+ (dir1 / "b.png", dir1),
60
+ (dir2 / "d.JPG", dir2),
61
+ }
62
+ assert set(result) == expected
63
+
64
+ def test_scan_dirs_for_extensions_handles_empty_directory_list():
65
+ result = _scan_dirs_for_extensions([], {"jpg", "png"})
66
+ assert result == []
67
+
68
+
69
+ def test_build_deep_record_df_returns_dataframe_with_required_columns(tmp_path, monkeypatch, loader):
70
+ p1 = tmp_path / "img1.jpg"; p1.write_bytes(b"")
71
+ p2 = tmp_path / "img2.png"; p2.write_bytes(b"")
72
+ paths = [p1, p2]
73
+
74
+ def fake_get_all_record_properties(_path, loader, processors):
75
+ assert loader.NAME == "bioio"
76
+ return {"width": 100, "height": 200}
77
+
78
+ monkeypatch.setattr("pixel_patrol_base.core.processing.get_all_record_properties",
79
+ fake_get_all_record_properties)
80
+
81
+ df = _build_deep_record_df(paths, loader)
82
+
83
+ assert isinstance(df, pl.DataFrame)
84
+ assert set(df.columns) == {"path", "width", "height"}
85
+ assert df.height == 2
86
+ assert df["width"].to_list() == [100, 100]
87
+ assert df["height"].to_list() == [200, 200]
88
+
89
+
90
+ def test_get_all_image_properties_returns_empty_for_nonexistent_file(tmp_path, loader, processors):
91
+ missing = tmp_path / "no.png"
92
+ assert processing.get_all_record_properties(missing, loader=loader, processors=processors) == {}
93
+
94
+
95
+ def test_get_all_image_properties_returns_empty_if_loading_fails(tmp_path, monkeypatch, loader, processors):
96
+ img_file = tmp_path / "img.jpg"
97
+ img_file.write_bytes(b"not really an image")
98
+ monkeypatch.setattr("pixel_patrol_loader_bio.plugins.loaders.bioio_loader._load_bioio_image", lambda p: None)
99
+ assert processing.get_all_record_properties(img_file, loader=loader, processors=processors) == {}
100
+
101
+ class DummyImg:
102
+ # Mocks the 'dims' object required by bioio_loader
103
+ dims = type("D", (), {"order": STANDARD_DIM_ORDER})()
104
+
105
+ # Properties required by the loader to determine array properties
106
+ shape = tuple(
107
+ 2 if d in ("Y", "X") else 1
108
+ for d in STANDARD_DIM_ORDER
109
+ )
110
+
111
+ # FIXED: dask_data must be a NumPy array with the correct shape/ndim, not None.
112
+ # This prevents _validate_and_fix_meta from incorrectly setting ndim to 0.
113
+ dask_data = np.zeros(shape, dtype=np.uint8)
114
+
115
+ # Other metadata properties used by the loader
116
+ channel_names = ["ch1", "ch2"]
117
+ ome_metadata = {}
118
+
119
+ physical_pixel_sizes = type(
120
+ "P",
121
+ (),
122
+ {"X": 0.5, "Y": 0.75, "Z": 1.0, "T": 1.0}
123
+ )()
124
+
125
+ # Mock methods required by the bioio_loader logic
126
+ def get_image_data(self):
127
+ # Return the actual array data
128
+ return np.asarray(self.dask_data)
129
+
130
+ def get_channel_names(self):
131
+ return self.channel_names
132
+
133
+ def get_physical_pixel_sizes(self):
134
+ # Mocks the method for other tests, uses the new attribute values
135
+ sizes = self.physical_pixel_sizes
136
+ return {'X': sizes.X, 'Y': sizes.Y, 'Z': sizes.Z, 'T': sizes.T}
137
+
138
+ def get_image_metadata(self):
139
+ return self.ome_metadata
140
+
141
+
142
+ def test_get_all_image_properties_extracts_standard_and_requested_metadata(tmp_path, monkeypatch, loader, processors):
143
+ img_file = tmp_path / "img.png"
144
+ img_file.write_bytes(b"")
145
+
146
+ monkeypatch.setattr(
147
+ "pixel_patrol_loader_bio.plugins.loaders.bioio_loader._load_bioio_image",
148
+ lambda p: DummyImg()
149
+ )
150
+ props = processing.get_all_record_properties(
151
+ img_file, loader=loader, processors=[]
152
+ )
153
+ expected_shape = [
154
+ 2 if d in ("Y", "X") else 1
155
+ for d in STANDARD_DIM_ORDER]
156
+
157
+ assert props["shape"] == expected_shape
158
+ assert props["ndim"] == len(STANDARD_DIM_ORDER)
159
+
160
+ assert props["pixel_size_X"] == 0.5
161
+ assert props["channel_names"] == ["ch1", "ch2"]
162
+
163
+
164
+ def test_get_deep_image_df_ignores_paths_with_no_metadata(tmp_path, monkeypatch, loader, processors):
165
+ p_valid = tmp_path / "valid.jpg"; p_valid.write_bytes(b"")
166
+ p_invalid = tmp_path / "invalid.png"; p_invalid.write_bytes(b"")
167
+
168
+ def fake_get_all_image_properties(path, _loader, _processors):
169
+ return {"width": 10, "height": 20} if path == p_valid else {}
170
+
171
+ monkeypatch.setattr(
172
+ "pixel_patrol_base.core.processing.get_all_record_properties",
173
+ fake_get_all_image_properties
174
+ )
175
+
176
+ df = _build_deep_record_df([p_valid, p_invalid], loader_instance=loader)
177
+
178
+ assert isinstance(df, pl.DataFrame)
179
+ assert df.height == 1
180
+ assert df["path"].to_list() == [str(p_valid)]
181
+ assert df["width"].to_list() == [10]
182
+ assert df["height"].to_list() == [20]
183
+
184
+
185
+ def test_build_records_df_from_file_system_with_images_returns_expected_columns_and_values(tmp_path, monkeypatch):
186
+
187
+ base = tmp_path / "root"
188
+ base.mkdir()
189
+ img1 = base / "graphic.png"; img1.write_text("dummy")
190
+ img2 = base / "photo1.jpg"; img2.write_text("dummy")
191
+ (base / "notes.txt").write_text("not an image")
192
+
193
+ expected_paths = [str(img1), str(img2)]
194
+
195
+ deep_df = pl.DataFrame({
196
+ "path": expected_paths,
197
+ "width": [64, 128],
198
+ "height": [48, 256],
199
+ })
200
+ monkeypatch.setattr(
201
+ "pixel_patrol_base.core.processing._build_deep_record_df",
202
+ lambda paths, cols: deep_df
203
+ )
204
+
205
+ result = build_records_df(
206
+ bases=[base],
207
+ selected_extensions={"jpg", "png"},
208
+ loader="bioio"
209
+ )
210
+
211
+ assert result is not None
212
+
213
+ expected_cols = set(PATHS_DF_EXPECTED_SCHEMA.keys()) | {"width", "height"}
214
+ assert expected_cols.issubset(set(result.columns))
215
+
216
+ assert set(result["path"].to_list()) == set(expected_paths)
217
+
218
+ result_dict = {
219
+ row["path"]: (row["width"], row["height"]) for row in result.iter_rows(named=True)
220
+ }
221
+ expected_dict = dict(zip(expected_paths, zip([64, 128], [48, 256])))
222
+ assert result_dict == expected_dict
223
+
224
+ def test_build_records_df_from_file_system_merges_basic_and_deep_metadata_correctly(tmp_path, monkeypatch):
225
+
226
+ base = tmp_path / "root"
227
+ base.mkdir()
228
+ img1 = base / "one.jpg"; img1.write_text("x")
229
+ img2 = base / "two.png"; img2.write_text("y")
230
+
231
+ deep_df = pl.DataFrame({
232
+ "path": [str(img1), str(img2)],
233
+ "width": [10, 20],
234
+ "height": [15, 25],
235
+ })
236
+ monkeypatch.setattr(
237
+ "pixel_patrol_base.core.processing._build_deep_record_df",
238
+ lambda paths, cols: deep_df
239
+ )
240
+
241
+ result = build_records_df(
242
+ bases=[base],
243
+ selected_extensions={"jpg", "png"},
244
+ loader="bioio"
245
+ )
246
+
247
+ expected_cols = set(PATHS_DF_EXPECTED_SCHEMA.keys()) | {"width", "height"}
248
+ assert expected_cols.issubset(set(result.columns))
249
+
250
+ df = result.sort("path")
251
+ assert df["path"].to_list() == [str(img1), str(img2)]
252
+ assert df["width"].to_list() == [10, 20]
253
+ assert df["height"].to_list() == [15, 25]
254
+
255
+
256
+ def test_postprocess_basic_file_metadata_df_adds_modification_month_and_imported_path_short(tmp_path):
257
+ from pathlib import Path
258
+ base = tmp_path
259
+ df = pl.DataFrame({
260
+ "path": [str(base / "sub" / "a.txt"), str(base / "sub" / "b.txt")],
261
+ "name": ["a.txt", "b.txt"],
262
+ "type": ["file", "file"],
263
+ "parent": [str(base / "sub"), str(base / "sub")],
264
+ "depth": [2, 2],
265
+ "size_bytes": [1024, 2048],
266
+ "modification_date": [
267
+ datetime(2025, 3, 15, 12, 0),
268
+ datetime(2025, 7, 1, 9, 30),
269
+ ],
270
+ "file_extension": ["txt", "txt"],
271
+ "size_readable": ["", ""],
272
+ "imported_path": [str(base), str(base)],
273
+ })
274
+
275
+ out = postprocess_basic_file_metadata_df(df)
276
+
277
+ assert set(PATHS_DF_EXPECTED_SCHEMA.keys()).issubset(set(out.columns))
278
+ assert out["modification_month"].to_list() == [3, 7]
279
+ actual_short = out["imported_path_short"].to_list()
280
+ expected_full = [str(base), str(base)]
281
+ expected_last = [Path(base).name, Path(base).name]
282
+ assert actual_short == expected_full or actual_short == expected_last
283
+ assert out["size_readable"].to_list() == ["1.0 KB", "2.0 KB"]
284
+
285
+ def test_full_records_df_computes_real_mean_intensity(tmp_path, loader):
286
+ img_dir = tmp_path / "imgs"
287
+ img_dir.mkdir()
288
+
289
+ a = np.zeros((2,2,1), dtype=np.uint8)
290
+ from PIL import Image
291
+ Image.fromarray(a.squeeze(), mode="L").save(img_dir / "zero.png")
292
+
293
+ b = np.full((2,2,1), 255, dtype=np.uint8)
294
+ Image.fromarray(b.squeeze(), mode="L").save(img_dir / "full.png")
295
+
296
+ df = build_records_df(
297
+ bases=[img_dir],
298
+ selected_extensions={"png"},
299
+ loader=loader
300
+ )
301
+ assert isinstance(df, pl.DataFrame)
302
+ paths = df["path"].to_list()
303
+ assert sorted(Path(p).name for p in paths) == ["full.png", "zero.png"]
304
+
305
+ assert "mean_intensity" in df.columns
306
+
307
+ mip = { Path(p).name: v for p, v in zip(df["path"].to_list(), df["mean_intensity"].to_list()) }
308
+ assert mip["zero.png"] == 0.0
309
+ assert mip["full.png"] == 255.0
310
+
311
+
312
+ def test_full_records_df_handles_5d_tif_t_z_c_dimensions(tmp_path, loader):
313
+ t_size, c_size, z_size, y_size, x_size = 2, 3, 4, 2, 2
314
+ arr = np.zeros((t_size, c_size, z_size, y_size, x_size), dtype=np.uint8)
315
+ for t in range(t_size):
316
+ for c in range(c_size):
317
+ for z in range(z_size):
318
+ arr[t, c, z, ...] = (t*z_size + z)*10 + c*5
319
+
320
+ path = tmp_path / "5d.tif"
321
+ tifffile.imwrite(str(path), arr, photometric='minisblack')
322
+
323
+ df = build_records_df(
324
+ bases=[tmp_path],
325
+ selected_extensions={"tif"},
326
+ loader=loader
327
+ )
328
+
329
+ expected_cols = {
330
+ f"mean_intensity_t{t}_c{c}_z{z}"
331
+ for t in range(t_size) for z in range(z_size) for c in range(c_size)
332
+ }
333
+ assert expected_cols.issubset(set(df.columns))
334
+
335
+ actual = {col: df[col][0] for col in expected_cols}
336
+ for t in range(t_size):
337
+ for z in range(z_size):
338
+ for c in range(c_size):
339
+ key = f"mean_intensity_t{t}_c{c}_z{z}"
340
+ expected = (t*z_size + z)*10 + c*5
341
+ assert actual[key] == expected, f"{key} was {actual[key]}, expected {expected}"
342
+
343
+ for t in range(t_size):
344
+ col = f"mean_intensity_t{t}"
345
+ assert col in df.columns
346
+ block_vals = [(t * z_size + z) * 10 + c * 5
347
+ for c in range(c_size) for z in range(z_size)]
348
+ expected = sum(block_vals) / len(block_vals)
349
+ assert df[0, col] == expected
350
+
351
+ for c in range(c_size):
352
+ col = f"mean_intensity_c{c}"
353
+ assert col in df.columns
354
+ block_vals = [(t * z_size + z) * 10 + c * 5
355
+ for t in range(t_size) for z in range(z_size)]
356
+ expected = sum(block_vals) / len(block_vals)
357
+ assert df[0, col] == expected
358
+
359
+ for z in range(z_size):
360
+ col = f"mean_intensity_z{z}"
361
+ assert col in df.columns
362
+ block_vals = [(t * z_size + z) * 10 + c * 5
363
+ for t in range(t_size) for c in range(c_size)]
364
+ expected = sum(block_vals) / len(block_vals)
365
+ assert df[0, col] == expected
366
+
367
+ assert "mean_intensity" in df.columns
368
+ all_vals = [(t * z_size + z) * 10 + c * 5
369
+ for t in range(t_size) for c in range(c_size) for z in range(z_size)]
370
+ overall_expected = sum(all_vals) / len(all_vals)
371
+ assert df[0,"mean_intensity"] == overall_expected
372
+
373
+
374
+ def test_full_records_df_handles_png_gray(tmp_path, loader):
375
+ arr = np.zeros((2, 2, 3), dtype=np.uint8)
376
+ arr[..., 0] = 10
377
+ arr[..., 1] = 20
378
+ arr[..., 2] = 30
379
+
380
+ path = tmp_path / "rgb.png"
381
+ Image.fromarray(arr).save(str(path))
382
+
383
+ df = build_records_df(
384
+ bases=[tmp_path],
385
+ selected_extensions={"png"},
386
+ loader=loader
387
+ )
388
+
389
+ assert "mean_intensity" in df.columns
390
+ raw_gray = np.mean(arr)
391
+ expected_gray = np.uint8(raw_gray)
392
+ assert df["mean_intensity"][0] == expected_gray
@@ -0,0 +1,99 @@
1
+ from pathlib import Path
2
+
3
+ import numpy as np
4
+ import pytest
5
+ import zarr
6
+ from zarr.storage import LocalStore
7
+ import polars as pl
8
+
9
+ from pixel_patrol_loader_bio.plugins.loaders.bioio_loader import BioIoLoader
10
+ from pixel_patrol_loader_bio.plugins.loaders.zarr_loader import ZarrLoader
11
+ from pixel_patrol_base.core.processing import get_all_record_properties, build_records_df
12
+ from pixel_patrol_base.plugin_registry import discover_processor_plugins
13
+
14
+
15
+ @pytest.fixture
16
+ def zarr_folder(tmp_path: Path) -> Path:
17
+ """
18
+ Create a minimal OME-Zarr folder with valid NGFF metadata using the modern LocalStore interface.
19
+ Returns the .zarr folder path.
20
+ """
21
+ zarr_path = tmp_path / "project" / "test_image.zarr"
22
+ zarr_path.parent.mkdir(parents=True, exist_ok=True)
23
+
24
+ shape = (1, 2, 1, 10, 10)
25
+ chunks = (1, 1, 1, 10, 10)
26
+ dtype = "uint16"
27
+ data = np.random.randint(0, 65535, size=shape, dtype=dtype)
28
+
29
+ # Use LocalStore for compatibility with modern Zarr v3+
30
+ store = LocalStore(str(zarr_path))
31
+ root = zarr.group(store=store)
32
+
33
+ arr = root.create_array(
34
+ "0",
35
+ shape=shape,
36
+ chunks=chunks,
37
+ dtype=dtype,
38
+ overwrite=True
39
+ )
40
+ arr[:] = data
41
+
42
+ # Add required NGFF metadata
43
+ root.attrs.put({
44
+ "multiscales": [{
45
+ "version": "0.4",
46
+ "datasets": [{"path": "0"}],
47
+
48
+ "axes": [
49
+ {"name": "t", "type": "time"},
50
+ {"name": "c", "type": "channel"},
51
+ {"name": "z", "type": "space"},
52
+ {"name": "y", "type": "space"},
53
+ {"name": "x", "type": "space"}
54
+ ]
55
+ }],
56
+ "omero": {
57
+ "channels": [
58
+ {"label": "Channel 0"},
59
+ {"label": "Channel 1"}
60
+ ]
61
+ }
62
+ })
63
+
64
+ return zarr_path
65
+
66
+ def test_zarr_path_recognition_as_image(zarr_folder: Path):
67
+ """
68
+ Test that a .zarr folder is correctly recognized and included in paths_df with type='file'.
69
+ """
70
+ parent_dir = zarr_folder.parent
71
+ paths_df = build_records_df([parent_dir], selected_extensions='all', loader=ZarrLoader())
72
+ zarr_rows = paths_df.filter(pl.col("path") == str(zarr_folder))
73
+
74
+ assert not zarr_rows.is_empty(), "Zarr folder not found in paths_df"
75
+ assert zarr_rows[0, "type"] == "file", "Zarr folder should be recognized as type 'file'"
76
+ assert zarr_rows[0, "file_extension"] == "zarr", "Zarr folder should have 'zarr' as file_extension"
77
+
78
+
79
+ # test decorator + signature (edit existing test)
80
+ @pytest.mark.parametrize("loader", [ZarrLoader(), BioIoLoader()])
81
+ def test_extract_metadata_from_zarr_using_bioio(zarr_folder: Path, loader):
82
+ """
83
+ Test that extract_image_metadata can process a .zarr folder and returns valid metadata.
84
+ """
85
+ metadata = get_all_record_properties(zarr_folder, loader=loader, processors=discover_processor_plugins())
86
+
87
+ assert isinstance(metadata, dict)
88
+
89
+ assert metadata.get("dim_order") in ["TCZYXS", "TCZYX", "TCYX", "CZYX", "CXY", "TYX"] # TODO: probably need to change so dim order is always TCZYXS
90
+ assert metadata.get("dtype") == "uint16"
91
+ assert metadata.get("T_size") == 1
92
+ assert metadata.get("C_size") == 2
93
+ assert metadata.get("Z_size") == 1
94
+ assert metadata.get("Y_size") == 10
95
+ assert metadata.get("X_size") == 10
96
+
97
+ assert "num_pixels" in metadata and metadata["num_pixels"] == 1 * 2 * 1 * 10 * 10
98
+ assert "shape" in metadata and metadata["shape"] == [1, 2, 1, 10, 10]
99
+ assert "ndim" in metadata and metadata["ndim"] == 5