biwt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biwt/__init__.py +14 -0
- biwt/core/__init__.py +1 -0
- biwt/core/cell_types.py +143 -0
- biwt/core/data_loader.py +307 -0
- biwt/core/domain.py +200 -0
- biwt/core/parameters/__init__.py +15 -0
- biwt/core/parameters/cell_templates.py +2929 -0
- biwt/core/parameters/xml_defaults.py +113 -0
- biwt/core/positioning.py +173 -0
- biwt/gui/__init__.py +25 -0
- biwt/gui/walkthrough.py +895 -0
- biwt/gui/widgets.py +278 -0
- biwt/gui/windows/__init__.py +23 -0
- biwt/gui/windows/base.py +108 -0
- biwt/gui/windows/cell_counts.py +347 -0
- biwt/gui/windows/cluster_column.py +65 -0
- biwt/gui/windows/edit_cell_types.py +345 -0
- biwt/gui/windows/load_cell_parameters.py +139 -0
- biwt/gui/windows/positions.py +2302 -0
- biwt/gui/windows/rename_cell_types.py +89 -0
- biwt/gui/windows/spatial_query.py +53 -0
- biwt/gui/windows/spot_deconvolution.py +66 -0
- biwt/gui/windows/write_positions.py +186 -0
- biwt/types.py +138 -0
- biwt-0.1.0.dist-info/METADATA +153 -0
- biwt-0.1.0.dist-info/RECORD +28 -0
- biwt-0.1.0.dist-info/WHEEL +5 -0
- biwt-0.1.0.dist-info/top_level.txt +1 -0
biwt/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
biwt — BioInformatics WalkThrough
|
|
3
|
+
|
|
4
|
+
Core public surface:
|
|
5
|
+
from biwt import BiwtInput, BiwtResult, DomainSpec
|
|
6
|
+
|
|
7
|
+
GUI (requires biwt[gui]):
|
|
8
|
+
from biwt.gui import create_biwt_widget
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from biwt.types import DomainSpec, BiwtInput, BiwtResult
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
__all__ = ["DomainSpec", "BiwtInput", "BiwtResult"]
|
biwt/core/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""biwt.core — data logic with no Qt dependency."""
|
biwt/core/cell_types.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cell-type configuration logic — purely data, no Qt.
|
|
3
|
+
|
|
4
|
+
The walkthrough gathers user decisions (keep / merge / delete / rename) and
|
|
5
|
+
stores them as ``CellTypeAction`` objects inside a ``CellTypeConfig``.
|
|
6
|
+
``CellTypeConfig.resolve()`` collapses those decisions into a flat
|
|
7
|
+
original_label → final_name mapping that ``positioning.py`` can consume.
|
|
8
|
+
|
|
9
|
+
``suggest_name_mappings`` provides lightweight heuristic hints to the GUI
|
|
10
|
+
so it can pre-populate rename fields when Studio cell-type names are available.
|
|
11
|
+
Future: replace / augment with a cell-type registry / ontology lookup.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Data classes
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class CellTypeAction:
|
|
26
|
+
"""Decision for one cell-type label discovered in the imported data.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
original_name:
|
|
31
|
+
The raw label as it appears in the data (e.g. ``"CD8+LAG3= T cell"``).
|
|
32
|
+
action:
|
|
33
|
+
One of ``"keep"``, ``"merge"``, ``"delete"``.
|
|
34
|
+
merge_target:
|
|
35
|
+
Required when ``action == "merge"``. The ``original_name`` of the
|
|
36
|
+
cell type to merge into. Transitively resolved by ``CellTypeConfig``.
|
|
37
|
+
final_name:
|
|
38
|
+
Override the displayed name. ``None`` means keep ``original_name``.
|
|
39
|
+
"""
|
|
40
|
+
original_name: str
|
|
41
|
+
action: str = "keep" # "keep" | "merge" | "delete"
|
|
42
|
+
merge_target: Optional[str] = None # only when action == "merge"
|
|
43
|
+
final_name: Optional[str] = None # None → use original_name
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class CellTypeConfig:
|
|
48
|
+
"""Complete cell-type decision set for one BIWT walkthrough session.
|
|
49
|
+
|
|
50
|
+
Usage
|
|
51
|
+
-----
|
|
52
|
+
config = CellTypeConfig()
|
|
53
|
+
config.add(CellTypeAction("T cell", action="keep", final_name="tcell"))
|
|
54
|
+
config.add(CellTypeAction("CD8 T cell", action="merge", merge_target="T cell"))
|
|
55
|
+
config.add(CellTypeAction("Unknown", action="delete"))
|
|
56
|
+
|
|
57
|
+
mapping = config.resolve()
|
|
58
|
+
# → {"T cell": "tcell", "CD8 T cell": "tcell", "Unknown": None}
|
|
59
|
+
"""
|
|
60
|
+
actions: dict = field(default_factory=dict) # original_name → CellTypeAction
|
|
61
|
+
|
|
62
|
+
def add(self, action: CellTypeAction) -> None:
|
|
63
|
+
self.actions[action.original_name] = action
|
|
64
|
+
|
|
65
|
+
def resolve_name(self, original: str, _seen: Optional[set] = None) -> Optional[str]:
|
|
66
|
+
"""Return the final cell-type name for *original*, or ``None`` if deleted.
|
|
67
|
+
|
|
68
|
+
Handles transitive merges (A→B→C) and detects cycles defensively.
|
|
69
|
+
"""
|
|
70
|
+
if _seen is None:
|
|
71
|
+
_seen = set()
|
|
72
|
+
if original in _seen:
|
|
73
|
+
# Cycle guard — fall back to original
|
|
74
|
+
return original
|
|
75
|
+
_seen.add(original)
|
|
76
|
+
|
|
77
|
+
a = self.actions.get(original)
|
|
78
|
+
if a is None:
|
|
79
|
+
return original
|
|
80
|
+
if a.action == "delete":
|
|
81
|
+
return None
|
|
82
|
+
if a.action == "merge":
|
|
83
|
+
if a.merge_target is None:
|
|
84
|
+
return original
|
|
85
|
+
return self.resolve_name(a.merge_target, _seen)
|
|
86
|
+
# action == "keep"
|
|
87
|
+
return a.final_name if a.final_name else original
|
|
88
|
+
|
|
89
|
+
def resolve(self) -> dict[str, Optional[str]]:
|
|
90
|
+
"""Build a flat ``{original_label: final_name | None}`` mapping."""
|
|
91
|
+
return {name: self.resolve_name(name) for name in self.actions}
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def kept_names(self) -> list[str]:
|
|
95
|
+
"""Unique final names that are not deleted."""
|
|
96
|
+
seen, result = set(), []
|
|
97
|
+
for final in self.resolve().values():
|
|
98
|
+
if final is not None and final not in seen:
|
|
99
|
+
seen.add(final)
|
|
100
|
+
result.append(final)
|
|
101
|
+
return result
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# ---------------------------------------------------------------------------
|
|
105
|
+
# Name-suggestion heuristics
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
def suggest_name_mappings(
|
|
109
|
+
data_labels: list[str],
|
|
110
|
+
host_names: list[str],
|
|
111
|
+
) -> dict[str, Optional[str]]:
|
|
112
|
+
"""Suggest a Studio cell-type name for each data label.
|
|
113
|
+
|
|
114
|
+
Strategy (in priority order):
|
|
115
|
+
1. Exact match (case-insensitive).
|
|
116
|
+
2. Studio name is a substring of the data label (or vice-versa).
|
|
117
|
+
|
|
118
|
+
Returns a dict ``{data_label: studio_name | None}``.
|
|
119
|
+
``None`` means no suggestion was found.
|
|
120
|
+
|
|
121
|
+
This is deliberately simple — good enough for pre-populating the GUI.
|
|
122
|
+
A future version will query a cell-type ontology / registry.
|
|
123
|
+
"""
|
|
124
|
+
host_lower = {n.lower(): n for n in host_names}
|
|
125
|
+
suggestions: dict[str, Optional[str]] = {}
|
|
126
|
+
|
|
127
|
+
for label in data_labels:
|
|
128
|
+
label_lower = label.lower()
|
|
129
|
+
match: Optional[str] = None
|
|
130
|
+
|
|
131
|
+
# 1. Exact
|
|
132
|
+
if label_lower in host_lower:
|
|
133
|
+
match = host_lower[label_lower]
|
|
134
|
+
else:
|
|
135
|
+
# 2. Substring
|
|
136
|
+
for sl, sn in host_lower.items():
|
|
137
|
+
if sl in label_lower or label_lower in sl:
|
|
138
|
+
match = sn
|
|
139
|
+
break
|
|
140
|
+
|
|
141
|
+
suggestions[label] = match
|
|
142
|
+
|
|
143
|
+
return suggestions
|
biwt/core/data_loader.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified single-cell data loader.
|
|
3
|
+
|
|
4
|
+
Supported formats
|
|
5
|
+
-----------------
|
|
6
|
+
.h5ad AnnData (requires biwt[anndata])
|
|
7
|
+
.rds Seurat / SingleCellExperiment via rpy2 + anndata2ri (requires biwt[seurat])
|
|
8
|
+
.rda / .rdata R workspace files (same rpy2 requirement; loaded with base::load())
|
|
9
|
+
.csv Flat tabular; spatial coordinates inferred from column names
|
|
10
|
+
|
|
11
|
+
All paths return a ``BiwtData`` object with a common interface so downstream
|
|
12
|
+
core logic never needs to know the source format.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Optional
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pandas as pd
|
|
22
|
+
|
|
23
|
+
from biwt.core.domain import _detect_spatial_location_from_obsm, _detect_spatial_location_from_obs
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# BiwtData — unified in-memory representation
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class BiwtData:
|
|
32
|
+
"""Unified in-memory representation of imported single-cell data.
|
|
33
|
+
|
|
34
|
+
Attributes
|
|
35
|
+
----------
|
|
36
|
+
obs:
|
|
37
|
+
Per-cell metadata DataFrame (cluster labels, cell-type columns, etc.).
|
|
38
|
+
Analogous to AnnData.obs.
|
|
39
|
+
obsm:
|
|
40
|
+
Named coordinate arrays (e.g. ``{"spatial": ndarray, "X_umap": ndarray}``).
|
|
41
|
+
Analogous to AnnData.obsm.
|
|
42
|
+
spatial_location:
|
|
43
|
+
Human-readable description of where spatial coordinates were found,
|
|
44
|
+
e.g. ``"obsm['spatial']"`` or ``"obs columns 'x', 'y'"`` or ``None``.
|
|
45
|
+
file_path:
|
|
46
|
+
Path the data was loaded from.
|
|
47
|
+
probability_columns:
|
|
48
|
+
Obs columns that look like per-cell-type deconvolution probabilities.
|
|
49
|
+
microns_per_pixel:
|
|
50
|
+
Scale factor derived from platform metadata (e.g. 10x Visium
|
|
51
|
+
``scalefactors``). ``None`` when not available; ``domain.py`` uses
|
|
52
|
+
this to convert pixel-space coordinates to µm before domain inference.
|
|
53
|
+
"""
|
|
54
|
+
obs: pd.DataFrame
|
|
55
|
+
obsm: dict = field(default_factory=dict)
|
|
56
|
+
spatial_location: Optional[str] = None
|
|
57
|
+
file_path: str = ""
|
|
58
|
+
probability_columns: list = field(default_factory=list)
|
|
59
|
+
microns_per_pixel: Optional[float] = None
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def column_names(self) -> list[str]:
|
|
63
|
+
return list(self.obs.columns)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def has_spatial(self) -> bool:
|
|
67
|
+
return self.spatial_location is not None
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def n_cells(self) -> int:
|
|
71
|
+
return len(self.obs)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Errors
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
class LoadError(Exception):
|
|
79
|
+
"""Raised when a file cannot be loaded."""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ---------------------------------------------------------------------------
|
|
83
|
+
# Public entry point
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
# R file extensions that share the same load path
|
|
87
|
+
_R_EXTENSIONS = {".rds", ".rda", ".rdata"}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def load(file_path: str) -> BiwtData:
|
|
91
|
+
"""Load single-cell data from *file_path* and return a ``BiwtData``.
|
|
92
|
+
|
|
93
|
+
Dispatches on file extension:
|
|
94
|
+
``.h5ad`` → AnnData
|
|
95
|
+
``.rds`` → R object saved with ``saveRDS()``
|
|
96
|
+
``.rda`` / ``.rdata`` → R workspace saved with ``save()``
|
|
97
|
+
``.csv`` → flat CSV
|
|
98
|
+
|
|
99
|
+
Raises
|
|
100
|
+
------
|
|
101
|
+
LoadError
|
|
102
|
+
On unsupported format or read failure, with an actionable message.
|
|
103
|
+
"""
|
|
104
|
+
path = Path(file_path)
|
|
105
|
+
suffix = path.suffix.lower()
|
|
106
|
+
if suffix == ".h5ad":
|
|
107
|
+
return _load_h5ad(file_path)
|
|
108
|
+
if suffix in _R_EXTENSIONS:
|
|
109
|
+
return _load_r_file(file_path, suffix)
|
|
110
|
+
if suffix == ".csv":
|
|
111
|
+
return _load_csv(file_path)
|
|
112
|
+
raise LoadError(
|
|
113
|
+
f"Unsupported file extension '{suffix}'. "
|
|
114
|
+
"BIWT supports: .h5ad, .rds, .rda, .rdata, .csv"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Format-specific loaders
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
def _load_h5ad(file_path: str) -> BiwtData:
|
|
123
|
+
try:
|
|
124
|
+
import anndata
|
|
125
|
+
except ImportError:
|
|
126
|
+
raise LoadError(
|
|
127
|
+
"anndata is required for .h5ad files.\n"
|
|
128
|
+
"Install with: pip install biwt[anndata]"
|
|
129
|
+
)
|
|
130
|
+
try:
|
|
131
|
+
adata = anndata.read_h5ad(file_path)
|
|
132
|
+
except Exception as e:
|
|
133
|
+
raise LoadError(f"Failed to read '{file_path}' as AnnData: {e}") from e
|
|
134
|
+
|
|
135
|
+
mpp = _extract_visium_microns_per_pixel(adata)
|
|
136
|
+
return _from_anndata_object(adata, file_path, microns_per_pixel=mpp)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _load_r_file(file_path: str, suffix: str) -> BiwtData:
|
|
140
|
+
"""Load an R object file (.rds, .rda, or .rdata) via rpy2 + anndata2ri.
|
|
141
|
+
|
|
142
|
+
.rds files contain a single serialised R object (``saveRDS`` / ``readRDS``).
|
|
143
|
+
.rda / .rdata files are R workspace files that can contain multiple named
|
|
144
|
+
objects (``save`` / ``load``). We grab the first object in the workspace;
|
|
145
|
+
if the file was produced by a standard Seurat/SCE export workflow it will
|
|
146
|
+
contain exactly one object.
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
import anndata2ri
|
|
150
|
+
from rpy2.robjects.packages import importr
|
|
151
|
+
from rpy2.robjects import r as reval
|
|
152
|
+
except ImportError:
|
|
153
|
+
raise LoadError(
|
|
154
|
+
"rpy2 and anndata2ri are required for R files.\n"
|
|
155
|
+
"Install with: pip install biwt[seurat]"
|
|
156
|
+
)
|
|
157
|
+
try:
|
|
158
|
+
anndata2ri.activate()
|
|
159
|
+
except Exception as e:
|
|
160
|
+
raise LoadError(f"anndata2ri activation failed: {e}") from e
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
base = importr("base")
|
|
164
|
+
|
|
165
|
+
if suffix == ".rds":
|
|
166
|
+
# readRDS returns the object directly
|
|
167
|
+
robj = base.readRDS(file_path)
|
|
168
|
+
else:
|
|
169
|
+
# load() reads into an environment; grab the first named object
|
|
170
|
+
env = reval("new.env(parent = emptyenv())")
|
|
171
|
+
base.load(file_path, envir=env)
|
|
172
|
+
obj_names = list(base.ls(env))
|
|
173
|
+
if not obj_names:
|
|
174
|
+
raise LoadError(f"No objects found in R workspace '{file_path}'.")
|
|
175
|
+
robj = env[obj_names[0]]
|
|
176
|
+
|
|
177
|
+
classname = tuple(robj.rclass)[0]
|
|
178
|
+
|
|
179
|
+
if classname in ("SingleCellExperiment", "SummarizedExperiment"):
|
|
180
|
+
adata = anndata2ri.rpy2py(robj)
|
|
181
|
+
elif classname == "Seurat":
|
|
182
|
+
reval("library(Seurat)")
|
|
183
|
+
# Re-read via R string evaluation so Seurat's conversion method is
|
|
184
|
+
# available; funnel through the same anndata2ri path afterwards.
|
|
185
|
+
if suffix == ".rds":
|
|
186
|
+
reval(f'x <- readRDS("{file_path}")')
|
|
187
|
+
else:
|
|
188
|
+
reval(f'load("{file_path}"); x <- get(ls()[1])')
|
|
189
|
+
adata = reval("as.SingleCellExperiment(x)")
|
|
190
|
+
adata = anndata2ri.rpy2py(adata)
|
|
191
|
+
else:
|
|
192
|
+
raise LoadError(
|
|
193
|
+
f"R object class '{classname}' is not supported. "
|
|
194
|
+
"Expected: Seurat, SingleCellExperiment, or SummarizedExperiment."
|
|
195
|
+
)
|
|
196
|
+
except LoadError:
|
|
197
|
+
raise
|
|
198
|
+
except Exception as e:
|
|
199
|
+
raise LoadError(f"Failed to read '{file_path}' as R object: {e}") from e
|
|
200
|
+
|
|
201
|
+
return _from_anndata_object(adata, file_path)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _load_csv(file_path: str) -> BiwtData:
|
|
205
|
+
try:
|
|
206
|
+
df = pd.read_csv(file_path)
|
|
207
|
+
except Exception as e:
|
|
208
|
+
raise LoadError(f"Failed to read '{file_path}' as CSV: {e}") from e
|
|
209
|
+
|
|
210
|
+
spatial_location = _detect_spatial_location_from_obs(df)
|
|
211
|
+
prob_cols = _find_probability_columns(df)
|
|
212
|
+
|
|
213
|
+
# Synthesize obsm["spatial"] from coordinate columns so the dim-red
|
|
214
|
+
# plotter in EditCellTypesWindow can display the spatial scatter plot.
|
|
215
|
+
obsm: dict = {}
|
|
216
|
+
if spatial_location is not None:
|
|
217
|
+
from biwt.core.domain import _find_coord_col
|
|
218
|
+
cols = list(df.columns)
|
|
219
|
+
x_col = _find_coord_col(cols, "x") or _find_coord_col(cols, "imagerow")
|
|
220
|
+
y_col = _find_coord_col(cols, "y") or _find_coord_col(cols, "imagecol")
|
|
221
|
+
if x_col and y_col:
|
|
222
|
+
z_col = _find_coord_col(cols, "z")
|
|
223
|
+
xy = np.column_stack([df[x_col].to_numpy(float), df[y_col].to_numpy(float)])
|
|
224
|
+
if z_col:
|
|
225
|
+
obsm["spatial"] = np.column_stack([xy, df[z_col].to_numpy(float)])
|
|
226
|
+
else:
|
|
227
|
+
obsm["spatial"] = xy
|
|
228
|
+
|
|
229
|
+
return BiwtData(
|
|
230
|
+
obs=df,
|
|
231
|
+
obsm=obsm,
|
|
232
|
+
spatial_location=spatial_location,
|
|
233
|
+
file_path=file_path,
|
|
234
|
+
probability_columns=prob_cols,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# ---------------------------------------------------------------------------
|
|
239
|
+
# Internal helpers
|
|
240
|
+
# ---------------------------------------------------------------------------
|
|
241
|
+
|
|
242
|
+
def _from_anndata_object(
|
|
243
|
+
adata,
|
|
244
|
+
file_path: str,
|
|
245
|
+
microns_per_pixel: Optional[float] = None,
|
|
246
|
+
) -> BiwtData:
|
|
247
|
+
"""Build a BiwtData from an in-memory AnnData object."""
|
|
248
|
+
try:
|
|
249
|
+
obs = adata.obs
|
|
250
|
+
obsm = dict(adata.obsm)
|
|
251
|
+
except Exception as e:
|
|
252
|
+
raise LoadError(f"Could not read obs/obsm from AnnData object: {e}") from e
|
|
253
|
+
|
|
254
|
+
spatial_loc = (
|
|
255
|
+
_detect_spatial_location_from_obsm(obsm)
|
|
256
|
+
or _detect_spatial_location_from_obs(obs)
|
|
257
|
+
)
|
|
258
|
+
prob_cols = _find_probability_columns(obs)
|
|
259
|
+
return BiwtData(
|
|
260
|
+
obs=obs,
|
|
261
|
+
obsm=obsm,
|
|
262
|
+
spatial_location=spatial_loc,
|
|
263
|
+
file_path=file_path,
|
|
264
|
+
probability_columns=prob_cols,
|
|
265
|
+
microns_per_pixel=microns_per_pixel,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _extract_visium_microns_per_pixel(adata) -> Optional[float]:
|
|
270
|
+
"""Extract the µm/pixel scale factor from 10x Visium AnnData metadata.
|
|
271
|
+
|
|
272
|
+
10x Visium spots are 55 µm in diameter in the tissue section.
|
|
273
|
+
The fullres pixel diameter is stored in
|
|
274
|
+
``adata.uns['spatial'][library_id]['scalefactors']['spot_diameter_fullres']``.
|
|
275
|
+
|
|
276
|
+
Returns ``None`` for any non-Visium or missing metadata — callers treat
|
|
277
|
+
``None`` as "scale unknown; use raw coordinates".
|
|
278
|
+
|
|
279
|
+
Platform-specific notes
|
|
280
|
+
-----------------------
|
|
281
|
+
This currently handles 10x Visium only. Other platforms with known
|
|
282
|
+
physical scales (Xenium, MERFISH, etc.) typically store coordinates
|
|
283
|
+
already in µm and do not need this conversion. Add cases here as
|
|
284
|
+
support for other platforms is added.
|
|
285
|
+
"""
|
|
286
|
+
try:
|
|
287
|
+
spatial_meta = adata.uns.get("spatial", {})
|
|
288
|
+
if not spatial_meta:
|
|
289
|
+
return None
|
|
290
|
+
# Take the first library (multi-library arrays are uncommon)
|
|
291
|
+
library_id = next(iter(spatial_meta))
|
|
292
|
+
scalefactors = spatial_meta[library_id].get("scalefactors", {})
|
|
293
|
+
spot_diameter_px = scalefactors.get("spot_diameter_fullres")
|
|
294
|
+
if spot_diameter_px and spot_diameter_px > 0:
|
|
295
|
+
visium_spot_diameter_um = 55.0
|
|
296
|
+
return visium_spot_diameter_um / spot_diameter_px
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
return None
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _find_probability_columns(obs: pd.DataFrame) -> list[str]:
|
|
303
|
+
"""Return obs columns that look like per-cell-type deconvolution probabilities."""
|
|
304
|
+
return [
|
|
305
|
+
col for col in obs.columns
|
|
306
|
+
if col.endswith("_probability") and (obs[col] >= 0).all() and obs[col].sum() > 0
|
|
307
|
+
]
|
biwt/core/domain.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Domain inference logic.
|
|
3
|
+
|
|
4
|
+
Priority order for resolving the final DomainSpec:
|
|
5
|
+
1. preferred — host-supplied DomainSpec (always wins if provided)
|
|
6
|
+
2. platform_microns — coordinates already in µm from platform metadata
|
|
7
|
+
(currently: 10x Visium via adata.uns['spatial'] scale factors)
|
|
8
|
+
3. data_range — min/max of raw coordinate arrays (obsm or obs columns),
|
|
9
|
+
scaled by microns_per_pixel when available
|
|
10
|
+
4. default — ±500 µm × ±10 µm fallback
|
|
11
|
+
|
|
12
|
+
Public entry point: ``infer_domain(preferred, obs, obsm, microns_per_pixel)``
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Optional
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
from biwt.types import DomainSpec
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Public API
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
def infer_domain(
|
|
28
|
+
preferred: Optional[DomainSpec] = None,
|
|
29
|
+
obs=None, # pd.DataFrame | None
|
|
30
|
+
obsm: Optional[dict] = None,
|
|
31
|
+
spatial_key: Optional[str] = None,
|
|
32
|
+
microns_per_pixel: Optional[float] = None,
|
|
33
|
+
) -> DomainSpec:
|
|
34
|
+
"""Return the best available DomainSpec given what the data provides.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
preferred:
|
|
39
|
+
DomainSpec from the host. Returned immediately if not ``None``.
|
|
40
|
+
obs:
|
|
41
|
+
AnnData / DataFrame of per-cell metadata. Checked for x/y(/z) columns
|
|
42
|
+
when obsm yields nothing useful.
|
|
43
|
+
obsm:
|
|
44
|
+
Dict of named coordinate arrays (e.g. ``{"spatial": ndarray, ...}``).
|
|
45
|
+
spatial_key:
|
|
46
|
+
Explicit key to use in ``obsm``. If ``None``, heuristic search is used.
|
|
47
|
+
microns_per_pixel:
|
|
48
|
+
Scale factor from platform metadata (e.g. 10x Visium). When provided,
|
|
49
|
+
raw pixel coordinates are multiplied by this value before computing the
|
|
50
|
+
domain bounding box so the result is in µm. ``None`` means coordinates
|
|
51
|
+
are assumed to already be in µm (or the scale is unknown).
|
|
52
|
+
"""
|
|
53
|
+
if preferred is not None:
|
|
54
|
+
return preferred
|
|
55
|
+
|
|
56
|
+
# --- try obsm ---------------------------------------------------------
|
|
57
|
+
if obsm is not None:
|
|
58
|
+
key = spatial_key or _find_spatial_key(obsm)
|
|
59
|
+
if key and key in obsm:
|
|
60
|
+
coords = np.asarray(obsm[key], dtype=float)
|
|
61
|
+
if coords.ndim == 2 and coords.shape[1] >= 2:
|
|
62
|
+
if microns_per_pixel is not None:
|
|
63
|
+
coords = coords * microns_per_pixel
|
|
64
|
+
source = "platform_microns"
|
|
65
|
+
else:
|
|
66
|
+
source = "data_range"
|
|
67
|
+
return _domain_from_coords(coords, source=source)
|
|
68
|
+
|
|
69
|
+
# --- try obs columns --------------------------------------------------
|
|
70
|
+
if obs is not None:
|
|
71
|
+
try:
|
|
72
|
+
cols = list(obs.columns)
|
|
73
|
+
except AttributeError:
|
|
74
|
+
cols = []
|
|
75
|
+
x_col = _find_coord_col(cols, "x")
|
|
76
|
+
y_col = _find_coord_col(cols, "y")
|
|
77
|
+
if x_col and y_col:
|
|
78
|
+
xy = np.column_stack([obs[x_col].values, obs[y_col].values]).astype(float)
|
|
79
|
+
z_col = _find_coord_col(cols, "z")
|
|
80
|
+
if z_col:
|
|
81
|
+
xy = np.column_stack([xy, obs[z_col].values])
|
|
82
|
+
if microns_per_pixel is not None:
|
|
83
|
+
xy = xy * microns_per_pixel
|
|
84
|
+
source = "platform_microns"
|
|
85
|
+
else:
|
|
86
|
+
source = "data_range"
|
|
87
|
+
return _domain_from_coords(xy, source=source)
|
|
88
|
+
|
|
89
|
+
return DomainSpec.default()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# Helpers
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def _domain_from_coords(coords: np.ndarray, source: str = "data_range") -> DomainSpec:
|
|
97
|
+
"""Build a DomainSpec from the bounding box of a coordinate array."""
|
|
98
|
+
xmin, xmax = float(coords[:, 0].min()), float(coords[:, 0].max())
|
|
99
|
+
ymin, ymax = float(coords[:, 1].min()), float(coords[:, 1].max())
|
|
100
|
+
zmin, zmax = -10.0, 10.0
|
|
101
|
+
if coords.shape[1] >= 3:
|
|
102
|
+
zmin = float(coords[:, 2].min())
|
|
103
|
+
zmax = float(coords[:, 2].max())
|
|
104
|
+
return DomainSpec(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
|
|
105
|
+
zmin=zmin, zmax=zmax, source=source)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _find_spatial_key(obsm: dict) -> Optional[str]:
|
|
109
|
+
"""Return the first obsm key that looks like spatial coordinates."""
|
|
110
|
+
priority = ["spatial", "X_spatial", "spatial_coords"]
|
|
111
|
+
for p in priority:
|
|
112
|
+
if p in obsm:
|
|
113
|
+
return p
|
|
114
|
+
for key in obsm:
|
|
115
|
+
kl = key.lower()
|
|
116
|
+
if "spatial" in kl or "coord" in kl:
|
|
117
|
+
return key
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
_COORD_CANDIDATES: dict[str, list[str]] = {
|
|
122
|
+
"x": ["x", "x_coord", "coord_x", "spatial_x", "x_centroid", "cell_x"],
|
|
123
|
+
"y": ["y", "y_coord", "coord_y", "spatial_y", "y_centroid", "cell_y"],
|
|
124
|
+
"z": ["z", "z_coord", "coord_z", "spatial_z", "z_centroid", "cell_z"],
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _find_coord_col(columns: list[str], axis: str) -> Optional[str]:
|
|
129
|
+
"""Case-insensitive search for a spatial axis column."""
|
|
130
|
+
cols_lower = {c.lower(): c for c in columns}
|
|
131
|
+
for candidate in _COORD_CANDIDATES.get(axis, []):
|
|
132
|
+
if candidate in cols_lower:
|
|
133
|
+
return cols_lower[candidate]
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
# Spatial-location description helpers (used by data_loader)
|
|
139
|
+
# ---------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
def classify_domain_mismatch(
|
|
142
|
+
data: DomainSpec,
|
|
143
|
+
preferred: DomainSpec,
|
|
144
|
+
) -> Optional[str]:
|
|
145
|
+
"""Classify the relationship between data coordinates and the preferred domain.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
"outside"
|
|
150
|
+
One or more data boundaries exceed the preferred domain — those cells
|
|
151
|
+
would be excluded from the simulation.
|
|
152
|
+
"small"
|
|
153
|
+
Data fits inside the preferred domain but covers < 50% of at least one
|
|
154
|
+
axis, or < 50% of the 2-D area — cells would be very sparse.
|
|
155
|
+
None
|
|
156
|
+
No significant mismatch.
|
|
157
|
+
"""
|
|
158
|
+
fits_inside = (
|
|
159
|
+
data.xmin >= preferred.xmin and data.xmax <= preferred.xmax
|
|
160
|
+
and data.ymin >= preferred.ymin and data.ymax <= preferred.ymax
|
|
161
|
+
)
|
|
162
|
+
if not fits_inside:
|
|
163
|
+
return "outside"
|
|
164
|
+
if preferred.width == 0 or preferred.height == 0:
|
|
165
|
+
return None
|
|
166
|
+
if (
|
|
167
|
+
data.width < 0.5 * preferred.width
|
|
168
|
+
or data.height < 0.5 * preferred.height
|
|
169
|
+
or data.width * data.height < 0.5 * preferred.width * preferred.height
|
|
170
|
+
):
|
|
171
|
+
return "small"
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# Spatial-location description helpers (used by data_loader)
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
def _detect_spatial_location_from_obsm(obsm: dict) -> Optional[str]:
|
|
180
|
+
"""Return a human-readable description of where spatial data lives in obsm."""
|
|
181
|
+
key = _find_spatial_key(obsm)
|
|
182
|
+
if key:
|
|
183
|
+
return f"obsm['{key}']"
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _detect_spatial_location_from_obs(obs) -> Optional[str]:
|
|
188
|
+
"""Return a human-readable description of spatial columns in an obs DataFrame."""
|
|
189
|
+
try:
|
|
190
|
+
cols = list(obs.columns)
|
|
191
|
+
except AttributeError:
|
|
192
|
+
return None
|
|
193
|
+
x_col = _find_coord_col(cols, "x")
|
|
194
|
+
y_col = _find_coord_col(cols, "y")
|
|
195
|
+
if x_col and y_col:
|
|
196
|
+
z_col = _find_coord_col(cols, "z")
|
|
197
|
+
if z_col:
|
|
198
|
+
return f"obs columns '{x_col}', '{y_col}', '{z_col}'"
|
|
199
|
+
return f"obs columns '{x_col}', '{y_col}'"
|
|
200
|
+
return None
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
biwt.core.parameters — cell-type template library.
|
|
3
|
+
|
|
4
|
+
Current contents
|
|
5
|
+
----------------
|
|
6
|
+
xml_defaults : Default PhysiCell XML snippets for each config section.
|
|
7
|
+
cell_templates : Named cell-type phenotype templates (PhysiCell XML strings).
|
|
8
|
+
|
|
9
|
+
Future direction
|
|
10
|
+
----------------
|
|
11
|
+
This subpackage will grow into a versioned cell-type registry / ontology,
|
|
12
|
+
eventually spun out into its own repo for independent version control.
|
|
13
|
+
Templates will be keyed by canonical cell-type names (with aliases) and
|
|
14
|
+
will carry hierarchical relationships (e.g. "CD8+ T cell" ⊂ "T cell").
|
|
15
|
+
"""
|