biwt 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
biwt/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ """
2
+ biwt — BioInformatics WalkThrough
3
+
4
+ Core public surface:
5
+ from biwt import BiwtInput, BiwtResult, DomainSpec
6
+
7
+ GUI (requires biwt[gui]):
8
+ from biwt.gui import create_biwt_widget
9
+ """
10
+
11
+ from biwt.types import DomainSpec, BiwtInput, BiwtResult
12
+
13
+ __version__ = "0.1.0"
14
+ __all__ = ["DomainSpec", "BiwtInput", "BiwtResult"]
biwt/core/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """biwt.core — data logic with no Qt dependency."""
@@ -0,0 +1,143 @@
1
+ """
2
+ Cell-type configuration logic — purely data, no Qt.
3
+
4
+ The walkthrough gathers user decisions (keep / merge / delete / rename) and
5
+ stores them as ``CellTypeAction`` objects inside a ``CellTypeConfig``.
6
+ ``CellTypeConfig.resolve()`` collapses those decisions into a flat
7
+ original_label → final_name mapping that ``positioning.py`` can consume.
8
+
9
+ ``suggest_name_mappings`` provides lightweight heuristic hints to the GUI
10
+ so it can pre-populate rename fields when Studio cell-type names are available.
11
+ Future: replace / augment with a cell-type registry / ontology lookup.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Data classes
22
+ # ---------------------------------------------------------------------------
23
+
24
+ @dataclass
25
+ class CellTypeAction:
26
+ """Decision for one cell-type label discovered in the imported data.
27
+
28
+ Parameters
29
+ ----------
30
+ original_name:
31
+ The raw label as it appears in the data (e.g. ``"CD8+LAG3= T cell"``).
32
+ action:
33
+ One of ``"keep"``, ``"merge"``, ``"delete"``.
34
+ merge_target:
35
+ Required when ``action == "merge"``. The ``original_name`` of the
36
+ cell type to merge into. Transitively resolved by ``CellTypeConfig``.
37
+ final_name:
38
+ Override the displayed name. ``None`` means keep ``original_name``.
39
+ """
40
+ original_name: str
41
+ action: str = "keep" # "keep" | "merge" | "delete"
42
+ merge_target: Optional[str] = None # only when action == "merge"
43
+ final_name: Optional[str] = None # None → use original_name
44
+
45
+
46
+ @dataclass
47
+ class CellTypeConfig:
48
+ """Complete cell-type decision set for one BIWT walkthrough session.
49
+
50
+ Usage
51
+ -----
52
+ config = CellTypeConfig()
53
+ config.add(CellTypeAction("T cell", action="keep", final_name="tcell"))
54
+ config.add(CellTypeAction("CD8 T cell", action="merge", merge_target="T cell"))
55
+ config.add(CellTypeAction("Unknown", action="delete"))
56
+
57
+ mapping = config.resolve()
58
+ # → {"T cell": "tcell", "CD8 T cell": "tcell", "Unknown": None}
59
+ """
60
+ actions: dict = field(default_factory=dict) # original_name → CellTypeAction
61
+
62
+ def add(self, action: CellTypeAction) -> None:
63
+ self.actions[action.original_name] = action
64
+
65
+ def resolve_name(self, original: str, _seen: Optional[set] = None) -> Optional[str]:
66
+ """Return the final cell-type name for *original*, or ``None`` if deleted.
67
+
68
+ Handles transitive merges (A→B→C) and detects cycles defensively.
69
+ """
70
+ if _seen is None:
71
+ _seen = set()
72
+ if original in _seen:
73
+ # Cycle guard — fall back to original
74
+ return original
75
+ _seen.add(original)
76
+
77
+ a = self.actions.get(original)
78
+ if a is None:
79
+ return original
80
+ if a.action == "delete":
81
+ return None
82
+ if a.action == "merge":
83
+ if a.merge_target is None:
84
+ return original
85
+ return self.resolve_name(a.merge_target, _seen)
86
+ # action == "keep"
87
+ return a.final_name if a.final_name else original
88
+
89
+ def resolve(self) -> dict[str, Optional[str]]:
90
+ """Build a flat ``{original_label: final_name | None}`` mapping."""
91
+ return {name: self.resolve_name(name) for name in self.actions}
92
+
93
+ @property
94
+ def kept_names(self) -> list[str]:
95
+ """Unique final names that are not deleted."""
96
+ seen, result = set(), []
97
+ for final in self.resolve().values():
98
+ if final is not None and final not in seen:
99
+ seen.add(final)
100
+ result.append(final)
101
+ return result
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Name-suggestion heuristics
106
+ # ---------------------------------------------------------------------------
107
+
108
+ def suggest_name_mappings(
109
+ data_labels: list[str],
110
+ host_names: list[str],
111
+ ) -> dict[str, Optional[str]]:
112
+ """Suggest a Studio cell-type name for each data label.
113
+
114
+ Strategy (in priority order):
115
+ 1. Exact match (case-insensitive).
116
+ 2. Studio name is a substring of the data label (or vice-versa).
117
+
118
+ Returns a dict ``{data_label: studio_name | None}``.
119
+ ``None`` means no suggestion was found.
120
+
121
+ This is deliberately simple — good enough for pre-populating the GUI.
122
+ A future version will query a cell-type ontology / registry.
123
+ """
124
+ host_lower = {n.lower(): n for n in host_names}
125
+ suggestions: dict[str, Optional[str]] = {}
126
+
127
+ for label in data_labels:
128
+ label_lower = label.lower()
129
+ match: Optional[str] = None
130
+
131
+ # 1. Exact
132
+ if label_lower in host_lower:
133
+ match = host_lower[label_lower]
134
+ else:
135
+ # 2. Substring
136
+ for sl, sn in host_lower.items():
137
+ if sl in label_lower or label_lower in sl:
138
+ match = sn
139
+ break
140
+
141
+ suggestions[label] = match
142
+
143
+ return suggestions
@@ -0,0 +1,307 @@
1
+ """
2
+ Unified single-cell data loader.
3
+
4
+ Supported formats
5
+ -----------------
6
+ .h5ad AnnData (requires biwt[anndata])
7
+ .rds Seurat / SingleCellExperiment via rpy2 + anndata2ri (requires biwt[seurat])
8
+ .rda / .rdata R workspace files (same rpy2 requirement; loaded with base::load())
9
+ .csv Flat tabular; spatial coordinates inferred from column names
10
+
11
+ All paths return a ``BiwtData`` object with a common interface so downstream
12
+ core logic never needs to know the source format.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Optional
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+ from biwt.core.domain import _detect_spatial_location_from_obsm, _detect_spatial_location_from_obs
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # BiwtData — unified in-memory representation
28
+ # ---------------------------------------------------------------------------
29
+
30
+ @dataclass
31
+ class BiwtData:
32
+ """Unified in-memory representation of imported single-cell data.
33
+
34
+ Attributes
35
+ ----------
36
+ obs:
37
+ Per-cell metadata DataFrame (cluster labels, cell-type columns, etc.).
38
+ Analogous to AnnData.obs.
39
+ obsm:
40
+ Named coordinate arrays (e.g. ``{"spatial": ndarray, "X_umap": ndarray}``).
41
+ Analogous to AnnData.obsm.
42
+ spatial_location:
43
+ Human-readable description of where spatial coordinates were found,
44
+ e.g. ``"obsm['spatial']"`` or ``"obs columns 'x', 'y'"`` or ``None``.
45
+ file_path:
46
+ Path the data was loaded from.
47
+ probability_columns:
48
+ Obs columns that look like per-cell-type deconvolution probabilities.
49
+ microns_per_pixel:
50
+ Scale factor derived from platform metadata (e.g. 10x Visium
51
+ ``scalefactors``). ``None`` when not available; ``domain.py`` uses
52
+ this to convert pixel-space coordinates to µm before domain inference.
53
+ """
54
+ obs: pd.DataFrame
55
+ obsm: dict = field(default_factory=dict)
56
+ spatial_location: Optional[str] = None
57
+ file_path: str = ""
58
+ probability_columns: list = field(default_factory=list)
59
+ microns_per_pixel: Optional[float] = None
60
+
61
+ @property
62
+ def column_names(self) -> list[str]:
63
+ return list(self.obs.columns)
64
+
65
+ @property
66
+ def has_spatial(self) -> bool:
67
+ return self.spatial_location is not None
68
+
69
+ @property
70
+ def n_cells(self) -> int:
71
+ return len(self.obs)
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Errors
76
+ # ---------------------------------------------------------------------------
77
+
78
+ class LoadError(Exception):
79
+ """Raised when a file cannot be loaded."""
80
+
81
+
82
+ # ---------------------------------------------------------------------------
83
+ # Public entry point
84
+ # ---------------------------------------------------------------------------
85
+
86
+ # R file extensions that share the same load path
87
+ _R_EXTENSIONS = {".rds", ".rda", ".rdata"}
88
+
89
+
90
+ def load(file_path: str) -> BiwtData:
91
+ """Load single-cell data from *file_path* and return a ``BiwtData``.
92
+
93
+ Dispatches on file extension:
94
+ ``.h5ad`` → AnnData
95
+ ``.rds`` → R object saved with ``saveRDS()``
96
+ ``.rda`` / ``.rdata`` → R workspace saved with ``save()``
97
+ ``.csv`` → flat CSV
98
+
99
+ Raises
100
+ ------
101
+ LoadError
102
+ On unsupported format or read failure, with an actionable message.
103
+ """
104
+ path = Path(file_path)
105
+ suffix = path.suffix.lower()
106
+ if suffix == ".h5ad":
107
+ return _load_h5ad(file_path)
108
+ if suffix in _R_EXTENSIONS:
109
+ return _load_r_file(file_path, suffix)
110
+ if suffix == ".csv":
111
+ return _load_csv(file_path)
112
+ raise LoadError(
113
+ f"Unsupported file extension '{suffix}'. "
114
+ "BIWT supports: .h5ad, .rds, .rda, .rdata, .csv"
115
+ )
116
+
117
+
118
+ # ---------------------------------------------------------------------------
119
+ # Format-specific loaders
120
+ # ---------------------------------------------------------------------------
121
+
122
+ def _load_h5ad(file_path: str) -> BiwtData:
123
+ try:
124
+ import anndata
125
+ except ImportError:
126
+ raise LoadError(
127
+ "anndata is required for .h5ad files.\n"
128
+ "Install with: pip install biwt[anndata]"
129
+ )
130
+ try:
131
+ adata = anndata.read_h5ad(file_path)
132
+ except Exception as e:
133
+ raise LoadError(f"Failed to read '{file_path}' as AnnData: {e}") from e
134
+
135
+ mpp = _extract_visium_microns_per_pixel(adata)
136
+ return _from_anndata_object(adata, file_path, microns_per_pixel=mpp)
137
+
138
+
139
+ def _load_r_file(file_path: str, suffix: str) -> BiwtData:
140
+ """Load an R object file (.rds, .rda, or .rdata) via rpy2 + anndata2ri.
141
+
142
+ .rds files contain a single serialised R object (``saveRDS`` / ``readRDS``).
143
+ .rda / .rdata files are R workspace files that can contain multiple named
144
+ objects (``save`` / ``load``). We grab the first object in the workspace;
145
+ if the file was produced by a standard Seurat/SCE export workflow it will
146
+ contain exactly one object.
147
+ """
148
+ try:
149
+ import anndata2ri
150
+ from rpy2.robjects.packages import importr
151
+ from rpy2.robjects import r as reval
152
+ except ImportError:
153
+ raise LoadError(
154
+ "rpy2 and anndata2ri are required for R files.\n"
155
+ "Install with: pip install biwt[seurat]"
156
+ )
157
+ try:
158
+ anndata2ri.activate()
159
+ except Exception as e:
160
+ raise LoadError(f"anndata2ri activation failed: {e}") from e
161
+
162
+ try:
163
+ base = importr("base")
164
+
165
+ if suffix == ".rds":
166
+ # readRDS returns the object directly
167
+ robj = base.readRDS(file_path)
168
+ else:
169
+ # load() reads into an environment; grab the first named object
170
+ env = reval("new.env(parent = emptyenv())")
171
+ base.load(file_path, envir=env)
172
+ obj_names = list(base.ls(env))
173
+ if not obj_names:
174
+ raise LoadError(f"No objects found in R workspace '{file_path}'.")
175
+ robj = env[obj_names[0]]
176
+
177
+ classname = tuple(robj.rclass)[0]
178
+
179
+ if classname in ("SingleCellExperiment", "SummarizedExperiment"):
180
+ adata = anndata2ri.rpy2py(robj)
181
+ elif classname == "Seurat":
182
+ reval("library(Seurat)")
183
+ # Re-read via R string evaluation so Seurat's conversion method is
184
+ # available; funnel through the same anndata2ri path afterwards.
185
+ if suffix == ".rds":
186
+ reval(f'x <- readRDS("{file_path}")')
187
+ else:
188
+ reval(f'load("{file_path}"); x <- get(ls()[1])')
189
+ adata = reval("as.SingleCellExperiment(x)")
190
+ adata = anndata2ri.rpy2py(adata)
191
+ else:
192
+ raise LoadError(
193
+ f"R object class '{classname}' is not supported. "
194
+ "Expected: Seurat, SingleCellExperiment, or SummarizedExperiment."
195
+ )
196
+ except LoadError:
197
+ raise
198
+ except Exception as e:
199
+ raise LoadError(f"Failed to read '{file_path}' as R object: {e}") from e
200
+
201
+ return _from_anndata_object(adata, file_path)
202
+
203
+
204
+ def _load_csv(file_path: str) -> BiwtData:
205
+ try:
206
+ df = pd.read_csv(file_path)
207
+ except Exception as e:
208
+ raise LoadError(f"Failed to read '{file_path}' as CSV: {e}") from e
209
+
210
+ spatial_location = _detect_spatial_location_from_obs(df)
211
+ prob_cols = _find_probability_columns(df)
212
+
213
+ # Synthesize obsm["spatial"] from coordinate columns so the dim-red
214
+ # plotter in EditCellTypesWindow can display the spatial scatter plot.
215
+ obsm: dict = {}
216
+ if spatial_location is not None:
217
+ from biwt.core.domain import _find_coord_col
218
+ cols = list(df.columns)
219
+ x_col = _find_coord_col(cols, "x") or _find_coord_col(cols, "imagerow")
220
+ y_col = _find_coord_col(cols, "y") or _find_coord_col(cols, "imagecol")
221
+ if x_col and y_col:
222
+ z_col = _find_coord_col(cols, "z")
223
+ xy = np.column_stack([df[x_col].to_numpy(float), df[y_col].to_numpy(float)])
224
+ if z_col:
225
+ obsm["spatial"] = np.column_stack([xy, df[z_col].to_numpy(float)])
226
+ else:
227
+ obsm["spatial"] = xy
228
+
229
+ return BiwtData(
230
+ obs=df,
231
+ obsm=obsm,
232
+ spatial_location=spatial_location,
233
+ file_path=file_path,
234
+ probability_columns=prob_cols,
235
+ )
236
+
237
+
238
+ # ---------------------------------------------------------------------------
239
+ # Internal helpers
240
+ # ---------------------------------------------------------------------------
241
+
242
+ def _from_anndata_object(
243
+ adata,
244
+ file_path: str,
245
+ microns_per_pixel: Optional[float] = None,
246
+ ) -> BiwtData:
247
+ """Build a BiwtData from an in-memory AnnData object."""
248
+ try:
249
+ obs = adata.obs
250
+ obsm = dict(adata.obsm)
251
+ except Exception as e:
252
+ raise LoadError(f"Could not read obs/obsm from AnnData object: {e}") from e
253
+
254
+ spatial_loc = (
255
+ _detect_spatial_location_from_obsm(obsm)
256
+ or _detect_spatial_location_from_obs(obs)
257
+ )
258
+ prob_cols = _find_probability_columns(obs)
259
+ return BiwtData(
260
+ obs=obs,
261
+ obsm=obsm,
262
+ spatial_location=spatial_loc,
263
+ file_path=file_path,
264
+ probability_columns=prob_cols,
265
+ microns_per_pixel=microns_per_pixel,
266
+ )
267
+
268
+
269
+ def _extract_visium_microns_per_pixel(adata) -> Optional[float]:
270
+ """Extract the µm/pixel scale factor from 10x Visium AnnData metadata.
271
+
272
+ 10x Visium spots are 55 µm in diameter in the tissue section.
273
+ The fullres pixel diameter is stored in
274
+ ``adata.uns['spatial'][library_id]['scalefactors']['spot_diameter_fullres']``.
275
+
276
+ Returns ``None`` for any non-Visium or missing metadata — callers treat
277
+ ``None`` as "scale unknown; use raw coordinates".
278
+
279
+ Platform-specific notes
280
+ -----------------------
281
+ This currently handles 10x Visium only. Other platforms with known
282
+ physical scales (Xenium, MERFISH, etc.) typically store coordinates
283
+ already in µm and do not need this conversion. Add cases here as
284
+ support for other platforms is added.
285
+ """
286
+ try:
287
+ spatial_meta = adata.uns.get("spatial", {})
288
+ if not spatial_meta:
289
+ return None
290
+ # Take the first library (multi-library arrays are uncommon)
291
+ library_id = next(iter(spatial_meta))
292
+ scalefactors = spatial_meta[library_id].get("scalefactors", {})
293
+ spot_diameter_px = scalefactors.get("spot_diameter_fullres")
294
+ if spot_diameter_px and spot_diameter_px > 0:
295
+ visium_spot_diameter_um = 55.0
296
+ return visium_spot_diameter_um / spot_diameter_px
297
+ except Exception:
298
+ pass
299
+ return None
300
+
301
+
302
+ def _find_probability_columns(obs: pd.DataFrame) -> list[str]:
303
+ """Return obs columns that look like per-cell-type deconvolution probabilities."""
304
+ return [
305
+ col for col in obs.columns
306
+ if col.endswith("_probability") and (obs[col] >= 0).all() and obs[col].sum() > 0
307
+ ]
biwt/core/domain.py ADDED
@@ -0,0 +1,200 @@
1
+ """
2
+ Domain inference logic.
3
+
4
+ Priority order for resolving the final DomainSpec:
5
+ 1. preferred — host-supplied DomainSpec (always wins if provided)
6
+ 2. platform_microns — coordinates already in µm from platform metadata
7
+ (currently: 10x Visium via adata.uns['spatial'] scale factors)
8
+ 3. data_range — min/max of raw coordinate arrays (obsm or obs columns),
9
+ scaled by microns_per_pixel when available
10
+ 4. default — ±500 µm × ±10 µm fallback
11
+
12
+ Public entry point: ``infer_domain(preferred, obs, obsm, microns_per_pixel)``
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Optional
18
+ import numpy as np
19
+
20
+ from biwt.types import DomainSpec
21
+
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Public API
25
+ # ---------------------------------------------------------------------------
26
+
27
+ def infer_domain(
28
+ preferred: Optional[DomainSpec] = None,
29
+ obs=None, # pd.DataFrame | None
30
+ obsm: Optional[dict] = None,
31
+ spatial_key: Optional[str] = None,
32
+ microns_per_pixel: Optional[float] = None,
33
+ ) -> DomainSpec:
34
+ """Return the best available DomainSpec given what the data provides.
35
+
36
+ Parameters
37
+ ----------
38
+ preferred:
39
+ DomainSpec from the host. Returned immediately if not ``None``.
40
+ obs:
41
+ AnnData / DataFrame of per-cell metadata. Checked for x/y(/z) columns
42
+ when obsm yields nothing useful.
43
+ obsm:
44
+ Dict of named coordinate arrays (e.g. ``{"spatial": ndarray, ...}``).
45
+ spatial_key:
46
+ Explicit key to use in ``obsm``. If ``None``, heuristic search is used.
47
+ microns_per_pixel:
48
+ Scale factor from platform metadata (e.g. 10x Visium). When provided,
49
+ raw pixel coordinates are multiplied by this value before computing the
50
+ domain bounding box so the result is in µm. ``None`` means coordinates
51
+ are assumed to already be in µm (or the scale is unknown).
52
+ """
53
+ if preferred is not None:
54
+ return preferred
55
+
56
+ # --- try obsm ---------------------------------------------------------
57
+ if obsm is not None:
58
+ key = spatial_key or _find_spatial_key(obsm)
59
+ if key and key in obsm:
60
+ coords = np.asarray(obsm[key], dtype=float)
61
+ if coords.ndim == 2 and coords.shape[1] >= 2:
62
+ if microns_per_pixel is not None:
63
+ coords = coords * microns_per_pixel
64
+ source = "platform_microns"
65
+ else:
66
+ source = "data_range"
67
+ return _domain_from_coords(coords, source=source)
68
+
69
+ # --- try obs columns --------------------------------------------------
70
+ if obs is not None:
71
+ try:
72
+ cols = list(obs.columns)
73
+ except AttributeError:
74
+ cols = []
75
+ x_col = _find_coord_col(cols, "x")
76
+ y_col = _find_coord_col(cols, "y")
77
+ if x_col and y_col:
78
+ xy = np.column_stack([obs[x_col].values, obs[y_col].values]).astype(float)
79
+ z_col = _find_coord_col(cols, "z")
80
+ if z_col:
81
+ xy = np.column_stack([xy, obs[z_col].values])
82
+ if microns_per_pixel is not None:
83
+ xy = xy * microns_per_pixel
84
+ source = "platform_microns"
85
+ else:
86
+ source = "data_range"
87
+ return _domain_from_coords(xy, source=source)
88
+
89
+ return DomainSpec.default()
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # Helpers
94
+ # ---------------------------------------------------------------------------
95
+
96
+ def _domain_from_coords(coords: np.ndarray, source: str = "data_range") -> DomainSpec:
97
+ """Build a DomainSpec from the bounding box of a coordinate array."""
98
+ xmin, xmax = float(coords[:, 0].min()), float(coords[:, 0].max())
99
+ ymin, ymax = float(coords[:, 1].min()), float(coords[:, 1].max())
100
+ zmin, zmax = -10.0, 10.0
101
+ if coords.shape[1] >= 3:
102
+ zmin = float(coords[:, 2].min())
103
+ zmax = float(coords[:, 2].max())
104
+ return DomainSpec(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
105
+ zmin=zmin, zmax=zmax, source=source)
106
+
107
+
108
+ def _find_spatial_key(obsm: dict) -> Optional[str]:
109
+ """Return the first obsm key that looks like spatial coordinates."""
110
+ priority = ["spatial", "X_spatial", "spatial_coords"]
111
+ for p in priority:
112
+ if p in obsm:
113
+ return p
114
+ for key in obsm:
115
+ kl = key.lower()
116
+ if "spatial" in kl or "coord" in kl:
117
+ return key
118
+ return None
119
+
120
+
121
+ _COORD_CANDIDATES: dict[str, list[str]] = {
122
+ "x": ["x", "x_coord", "coord_x", "spatial_x", "x_centroid", "cell_x"],
123
+ "y": ["y", "y_coord", "coord_y", "spatial_y", "y_centroid", "cell_y"],
124
+ "z": ["z", "z_coord", "coord_z", "spatial_z", "z_centroid", "cell_z"],
125
+ }
126
+
127
+
128
+ def _find_coord_col(columns: list[str], axis: str) -> Optional[str]:
129
+ """Case-insensitive search for a spatial axis column."""
130
+ cols_lower = {c.lower(): c for c in columns}
131
+ for candidate in _COORD_CANDIDATES.get(axis, []):
132
+ if candidate in cols_lower:
133
+ return cols_lower[candidate]
134
+ return None
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Spatial-location description helpers (used by data_loader)
139
+ # ---------------------------------------------------------------------------
140
+
141
+ def classify_domain_mismatch(
142
+ data: DomainSpec,
143
+ preferred: DomainSpec,
144
+ ) -> Optional[str]:
145
+ """Classify the relationship between data coordinates and the preferred domain.
146
+
147
+ Returns
148
+ -------
149
+ "outside"
150
+ One or more data boundaries exceed the preferred domain — those cells
151
+ would be excluded from the simulation.
152
+ "small"
153
+ Data fits inside the preferred domain but covers < 50% of at least one
154
+ axis, or < 50% of the 2-D area — cells would be very sparse.
155
+ None
156
+ No significant mismatch.
157
+ """
158
+ fits_inside = (
159
+ data.xmin >= preferred.xmin and data.xmax <= preferred.xmax
160
+ and data.ymin >= preferred.ymin and data.ymax <= preferred.ymax
161
+ )
162
+ if not fits_inside:
163
+ return "outside"
164
+ if preferred.width == 0 or preferred.height == 0:
165
+ return None
166
+ if (
167
+ data.width < 0.5 * preferred.width
168
+ or data.height < 0.5 * preferred.height
169
+ or data.width * data.height < 0.5 * preferred.width * preferred.height
170
+ ):
171
+ return "small"
172
+ return None
173
+
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # Spatial-location description helpers (used by data_loader)
177
+ # ---------------------------------------------------------------------------
178
+
179
+ def _detect_spatial_location_from_obsm(obsm: dict) -> Optional[str]:
180
+ """Return a human-readable description of where spatial data lives in obsm."""
181
+ key = _find_spatial_key(obsm)
182
+ if key:
183
+ return f"obsm['{key}']"
184
+ return None
185
+
186
+
187
+ def _detect_spatial_location_from_obs(obs) -> Optional[str]:
188
+ """Return a human-readable description of spatial columns in an obs DataFrame."""
189
+ try:
190
+ cols = list(obs.columns)
191
+ except AttributeError:
192
+ return None
193
+ x_col = _find_coord_col(cols, "x")
194
+ y_col = _find_coord_col(cols, "y")
195
+ if x_col and y_col:
196
+ z_col = _find_coord_col(cols, "z")
197
+ if z_col:
198
+ return f"obs columns '{x_col}', '{y_col}', '{z_col}'"
199
+ return f"obs columns '{x_col}', '{y_col}'"
200
+ return None
@@ -0,0 +1,15 @@
1
+ """
2
+ biwt.core.parameters — cell-type template library.
3
+
4
+ Current contents
5
+ ----------------
6
+ xml_defaults : Default PhysiCell XML snippets for each config section.
7
+ cell_templates : Named cell-type phenotype templates (PhysiCell XML strings).
8
+
9
+ Future direction
10
+ ----------------
11
+ This subpackage will grow into a versioned cell-type registry / ontology,
12
+ eventually spun out into its own repo for independent version control.
13
+ Templates will be keyed by canonical cell-type names (with aliases) and
14
+ will carry hierarchical relationships (e.g. "CD8+ T cell" ⊂ "T cell").
15
+ """