biwt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. biwt-0.1.0/PKG-INFO +153 -0
  2. biwt-0.1.0/README.md +118 -0
  3. biwt-0.1.0/pyproject.toml +71 -0
  4. biwt-0.1.0/setup.cfg +4 -0
  5. biwt-0.1.0/src/biwt/__init__.py +14 -0
  6. biwt-0.1.0/src/biwt/core/__init__.py +1 -0
  7. biwt-0.1.0/src/biwt/core/cell_types.py +143 -0
  8. biwt-0.1.0/src/biwt/core/data_loader.py +307 -0
  9. biwt-0.1.0/src/biwt/core/domain.py +200 -0
  10. biwt-0.1.0/src/biwt/core/parameters/__init__.py +15 -0
  11. biwt-0.1.0/src/biwt/core/parameters/cell_templates.py +2929 -0
  12. biwt-0.1.0/src/biwt/core/parameters/xml_defaults.py +113 -0
  13. biwt-0.1.0/src/biwt/core/positioning.py +173 -0
  14. biwt-0.1.0/src/biwt/gui/__init__.py +25 -0
  15. biwt-0.1.0/src/biwt/gui/walkthrough.py +895 -0
  16. biwt-0.1.0/src/biwt/gui/widgets.py +278 -0
  17. biwt-0.1.0/src/biwt/gui/windows/__init__.py +23 -0
  18. biwt-0.1.0/src/biwt/gui/windows/base.py +108 -0
  19. biwt-0.1.0/src/biwt/gui/windows/cell_counts.py +347 -0
  20. biwt-0.1.0/src/biwt/gui/windows/cluster_column.py +65 -0
  21. biwt-0.1.0/src/biwt/gui/windows/edit_cell_types.py +345 -0
  22. biwt-0.1.0/src/biwt/gui/windows/load_cell_parameters.py +139 -0
  23. biwt-0.1.0/src/biwt/gui/windows/positions.py +2302 -0
  24. biwt-0.1.0/src/biwt/gui/windows/rename_cell_types.py +89 -0
  25. biwt-0.1.0/src/biwt/gui/windows/spatial_query.py +53 -0
  26. biwt-0.1.0/src/biwt/gui/windows/spot_deconvolution.py +66 -0
  27. biwt-0.1.0/src/biwt/gui/windows/write_positions.py +186 -0
  28. biwt-0.1.0/src/biwt/types.py +138 -0
  29. biwt-0.1.0/src/biwt.egg-info/PKG-INFO +153 -0
  30. biwt-0.1.0/src/biwt.egg-info/SOURCES.txt +32 -0
  31. biwt-0.1.0/src/biwt.egg-info/dependency_links.txt +1 -0
  32. biwt-0.1.0/src/biwt.egg-info/requires.txt +22 -0
  33. biwt-0.1.0/src/biwt.egg-info/top_level.txt +1 -0
  34. biwt-0.1.0/tests/test_session.py +704 -0
biwt-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: biwt
3
+ Version: 0.1.0
4
+ Summary: BioInformatics WalkThrough: import single-cell data and generate agent-based model initial conditions
5
+ Author-email: Daniel Bergman <dbergman1@som.umaryland.edu>, Jeanette Johnson <jjohn450@jhmi.edu>, Randy Heiland <heiland@iu.edu>, Paul Macklin <macklinp@iu.edu>, Marwa Naji <mnaji2@jh.edu>
6
+ License: BSD-3-Clause
7
+ Keywords: PhysiCell,bioinformatics,single-cell,initial conditions,agent-based modeling
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: BSD License
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ Requires-Dist: numpy>=1.22
19
+ Requires-Dist: pandas>=1.4
20
+ Provides-Extra: anndata
21
+ Requires-Dist: anndata>=0.9; extra == "anndata"
22
+ Provides-Extra: seurat
23
+ Requires-Dist: anndata>=0.9; extra == "seurat"
24
+ Requires-Dist: anndata2ri>=1.1; extra == "seurat"
25
+ Requires-Dist: rpy2>=3.5; extra == "seurat"
26
+ Provides-Extra: gui
27
+ Requires-Dist: PyQt5>=5.15; extra == "gui"
28
+ Requires-Dist: matplotlib>=3.5; extra == "gui"
29
+ Provides-Extra: all
30
+ Requires-Dist: biwt[anndata,gui,seurat]; extra == "all"
31
+ Provides-Extra: dev
32
+ Requires-Dist: pytest>=7; extra == "dev"
33
+ Requires-Dist: pytest-qt; extra == "dev"
34
+ Requires-Dist: biwt[anndata,gui]; extra == "dev"
35
+
36
+ # BIWT — BioInformatics WalkThrough
37
+
38
+ A guided wizard for importing single-cell bioinformatics data and generating initial conditions for agent-based models (ABMs). Designed as a standalone pip-installable package that can be embedded in any host application. Currently integrated with PhysiCell Studio.
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install -e . # core (CSV support only)
44
+ pip install -e ".[anndata]" # + .h5ad support
45
+ pip install -e ".[seurat]" # + .rds/.rda support (requires R + rpy2)
46
+ pip install -e ".[dev]" # + test dependencies
47
+ ```
48
+
49
+ ## Quick Start
50
+
51
+ ```python
52
+ from biwt.gui.walkthrough import create_biwt_widget
53
+ from biwt.types import BiwtInput, DomainSpec
54
+
55
+ domain = DomainSpec(xmin=-500, xmax=500, ymin=-500, ymax=500, units="micron")
56
+ biwt_input = BiwtInput(preferred_domain=domain)
57
+
58
+ def on_complete(result):
59
+ # result.coordinates is a DataFrame with columns: x, y, z, type
60
+ result.to_csv("config/cells.csv")
61
+
62
+ widget = create_biwt_widget(biwt_input, on_complete=on_complete)
63
+ widget.show()
64
+ ```
65
+
66
+ ## Running Tests
67
+
68
+ ```bash
69
+ PYTHONPATH=src python -m pytest tests/ -v
70
+ ```
71
+
72
+ ## Package Structure
73
+
74
+ ```
75
+ src/biwt/
76
+ types.py — Public API: DomainSpec, BiwtInput, BiwtResult
77
+ core/
78
+ data_loader.py — Unified loader (.h5ad, .rds, .csv) → BiwtData
79
+ domain.py — Domain inference + coordinate column detection
80
+ positioning.py — Coordinate scaling + build_ic_dataframe
81
+ cell_types.py — Name-matching heuristics
82
+ parameters/
83
+ cell_templates.py — 29 PhysiCell cell-type XML templates
84
+ xml_defaults.py — Default PhysiCell XML scaffold
85
+ gui/
86
+ walkthrough.py — Session state machine + Qt widget + step logic
87
+ widgets.py — Shared Qt widgets
88
+ windows/ — One file per walkthrough step
89
+ tests/
90
+ test_session.py — 43 tests covering session logic end-to-end
91
+ fixtures/ — CSV test fixtures
92
+ ```
93
+
94
+ ## Key Design Decisions
95
+
96
+ - **No file I/O in BIWT.** The package returns `BiwtResult` in-memory; the host decides how to write.
97
+ - **Pure-Python session.** `WalkthroughSession` has no Qt dependencies. All Qt logic is in window classes.
98
+ - **Single source of truth for steps.** `_step_predicates(session)` defines step ordering. Tests import it directly.
99
+ - **CSV uses `type` header** (not `cell_type`) to match PhysiCell convention.
100
+ - **Domain units.** `DomainSpec.units` defaults to `"micron"` but supports other ABM frameworks.
101
+
102
+ ## Implementation Status
103
+
104
+ ### Completed
105
+
106
+ - [x] Data import: .h5ad, .rds/.rda/.rdata, .csv
107
+ - [x] Spatial coordinate detection (obsm, obs columns, Visium scale factors)
108
+ - [x] CSV spatial synthesis (x/y/z obs columns → obsm["spatial"])
109
+ - [x] Domain inference with priority chain (preferred > platform > data_range > default)
110
+ - [x] Domain mismatch: two-tier detection (classify_domain_mismatch: "outside" / "small" / None)
111
+ - [x] DomainEditorDialog auto-triggered at positions window open (not import time)
112
+ - [x] Context-sensitive mismatch header; no header for manual "Domain Settings…" open
113
+ - [x] domain_accepted flag prevents re-trigger on back/forward navigation
114
+ - [x] BiwtInput.domain_accepted + "Skip domain validation" checkbox bypass auto-check
115
+ - [x] Z-fields default to ±10 for 2D data in domain editor
116
+ - [x] DomainSpec units field; auto-scale toggle wired into positions step
117
+ - [x] Auto-scale off: raw data bounding box centered at domain center (not identity transform)
118
+ - [x] "Domain Settings…" button in positions plot window for manual domain editing
119
+ - [x] Spot deconvolution query and cell expansion
120
+ - [x] Cluster column selection
121
+ - [x] Spatial data query (use spatial coords or random placement)
122
+ - [x] Edit cell types (keep / merge / delete) with scatter plot and legend
123
+ - [x] Rename cell types with Studio name suggestions and duplicate blocking
124
+ - [x] Cell counts (data counts, confluence, total count modes)
125
+ - [x] Coordinate placement (spatial scaling, random placement)
126
+ - [x] 29 cell parameter templates with XML assembly
127
+ - [x] BiwtResult assembly (coordinates, cell_type_map, domain, XML)
128
+ - [x] Studio bridge (BiwtInput/BiwtResult, _biwt_complete callback)
129
+ - [x] Overwrite/Append/Browse/Cancel dialog for CSV output
130
+ - [x] Append handles extra columns in existing CSV
131
+ - [x] Session reset on reimport
132
+ - [x] Step predicate extraction for testability
133
+ - [x] 57 passing tests
134
+
135
+ ### In Progress
136
+
137
+ - [ ] End-to-end manual testing with Studio
138
+
139
+ ### Remaining
140
+
141
+ - [x] pyproject.toml extras for anndata/seurat/dev dependencies
142
+ - [x] CI pipeline (GitHub Actions, Python 3.9–3.12)
143
+ - [ ] CI: R-dependent tests for `.rds` import (requires provisioning R on CI runners; seurat excluded from `dev` extra for now)
144
+ - [ ] User documentation / help text within wizard steps
145
+ - [ ] Substrate/gene expression pass-through (reserved fields in BiwtResult)
146
+ - [ ] Multi-library Visium support
147
+ - [ ] 3D spatial data support beyond z=0 padding
148
+
149
+ ## Related Documents
150
+
151
+ - [PRD.md](PRD.md) — Product requirements (behavioral specs, acceptance criteria)
152
+ - [progress.md](progress.md) — Session decisions and reasoning
153
+ - [CLAUDE.md](CLAUDE.md) — Claude agent guide for this repo
biwt-0.1.0/README.md ADDED
@@ -0,0 +1,118 @@
1
+ # BIWT — BioInformatics WalkThrough
2
+
3
+ A guided wizard for importing single-cell bioinformatics data and generating initial conditions for agent-based models (ABMs). Designed as a standalone pip-installable package that can be embedded in any host application. Currently integrated with PhysiCell Studio.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install -e . # core (CSV support only)
9
+ pip install -e ".[anndata]" # + .h5ad support
10
+ pip install -e ".[seurat]" # + .rds/.rda support (requires R + rpy2)
11
+ pip install -e ".[dev]" # + test dependencies
12
+ ```
13
+
14
+ ## Quick Start
15
+
16
+ ```python
17
+ from biwt.gui.walkthrough import create_biwt_widget
18
+ from biwt.types import BiwtInput, DomainSpec
19
+
20
+ domain = DomainSpec(xmin=-500, xmax=500, ymin=-500, ymax=500, units="micron")
21
+ biwt_input = BiwtInput(preferred_domain=domain)
22
+
23
+ def on_complete(result):
24
+ # result.coordinates is a DataFrame with columns: x, y, z, type
25
+ result.to_csv("config/cells.csv")
26
+
27
+ widget = create_biwt_widget(biwt_input, on_complete=on_complete)
28
+ widget.show()
29
+ ```
30
+
31
+ ## Running Tests
32
+
33
+ ```bash
34
+ PYTHONPATH=src python -m pytest tests/ -v
35
+ ```
36
+
37
+ ## Package Structure
38
+
39
+ ```
40
+ src/biwt/
41
+ types.py — Public API: DomainSpec, BiwtInput, BiwtResult
42
+ core/
43
+ data_loader.py — Unified loader (.h5ad, .rds, .csv) → BiwtData
44
+ domain.py — Domain inference + coordinate column detection
45
+ positioning.py — Coordinate scaling + build_ic_dataframe
46
+ cell_types.py — Name-matching heuristics
47
+ parameters/
48
+ cell_templates.py — 29 PhysiCell cell-type XML templates
49
+ xml_defaults.py — Default PhysiCell XML scaffold
50
+ gui/
51
+ walkthrough.py — Session state machine + Qt widget + step logic
52
+ widgets.py — Shared Qt widgets
53
+ windows/ — One file per walkthrough step
54
+ tests/
55
+ test_session.py — 43 tests covering session logic end-to-end
56
+ fixtures/ — CSV test fixtures
57
+ ```
58
+
59
+ ## Key Design Decisions
60
+
61
+ - **No file I/O in BIWT.** The package returns `BiwtResult` in-memory; the host decides how to write.
62
+ - **Pure-Python session.** `WalkthroughSession` has no Qt dependencies. All Qt logic is in window classes.
63
+ - **Single source of truth for steps.** `_step_predicates(session)` defines step ordering. Tests import it directly.
64
+ - **CSV uses `type` header** (not `cell_type`) to match PhysiCell convention.
65
+ - **Domain units.** `DomainSpec.units` defaults to `"micron"` but supports other ABM frameworks.
66
+
67
+ ## Implementation Status
68
+
69
+ ### Completed
70
+
71
+ - [x] Data import: .h5ad, .rds/.rda/.rdata, .csv
72
+ - [x] Spatial coordinate detection (obsm, obs columns, Visium scale factors)
73
+ - [x] CSV spatial synthesis (x/y/z obs columns → obsm["spatial"])
74
+ - [x] Domain inference with priority chain (preferred > platform > data_range > default)
75
+ - [x] Domain mismatch: two-tier detection (classify_domain_mismatch: "outside" / "small" / None)
76
+ - [x] DomainEditorDialog auto-triggered at positions window open (not import time)
77
+ - [x] Context-sensitive mismatch header; no header for manual "Domain Settings…" open
78
+ - [x] domain_accepted flag prevents re-trigger on back/forward navigation
79
+ - [x] BiwtInput.domain_accepted + "Skip domain validation" checkbox bypass auto-check
80
+ - [x] Z-fields default to ±10 for 2D data in domain editor
81
+ - [x] DomainSpec units field; auto-scale toggle wired into positions step
82
+ - [x] Auto-scale off: raw data bounding box centered at domain center (not identity transform)
83
+ - [x] "Domain Settings…" button in positions plot window for manual domain editing
84
+ - [x] Spot deconvolution query and cell expansion
85
+ - [x] Cluster column selection
86
+ - [x] Spatial data query (use spatial coords or random placement)
87
+ - [x] Edit cell types (keep / merge / delete) with scatter plot and legend
88
+ - [x] Rename cell types with Studio name suggestions and duplicate blocking
89
+ - [x] Cell counts (data counts, confluence, total count modes)
90
+ - [x] Coordinate placement (spatial scaling, random placement)
91
+ - [x] 29 cell parameter templates with XML assembly
92
+ - [x] BiwtResult assembly (coordinates, cell_type_map, domain, XML)
93
+ - [x] Studio bridge (BiwtInput/BiwtResult, _biwt_complete callback)
94
+ - [x] Overwrite/Append/Browse/Cancel dialog for CSV output
95
+ - [x] Append handles extra columns in existing CSV
96
+ - [x] Session reset on reimport
97
+ - [x] Step predicate extraction for testability
98
+ - [x] 57 passing tests
99
+
100
+ ### In Progress
101
+
102
+ - [ ] End-to-end manual testing with Studio
103
+
104
+ ### Remaining
105
+
106
+ - [x] pyproject.toml extras for anndata/seurat/dev dependencies
107
+ - [x] CI pipeline (GitHub Actions, Python 3.9–3.12)
108
+ - [ ] CI: R-dependent tests for `.rds` import (requires provisioning R on CI runners; seurat excluded from `dev` extra for now)
109
+ - [ ] User documentation / help text within wizard steps
110
+ - [ ] Substrate/gene expression pass-through (reserved fields in BiwtResult)
111
+ - [ ] Multi-library Visium support
112
+ - [ ] 3D spatial data support beyond z=0 padding
113
+
114
+ ## Related Documents
115
+
116
+ - [PRD.md](PRD.md) — Product requirements (behavioral specs, acceptance criteria)
117
+ - [progress.md](progress.md) — Session decisions and reasoning
118
+ - [CLAUDE.md](CLAUDE.md) — Claude agent guide for this repo
@@ -0,0 +1,71 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "biwt"
7
+ version = "0.1.0"
8
+ description = "BioInformatics WalkThrough: import single-cell data and generate agent-based model initial conditions"
9
+ authors = [
10
+ { name = "Daniel Bergman", email = "dbergman1@som.umaryland.edu" },
11
+ { name = "Jeanette Johnson", email = "jjohn450@jhmi.edu" },
12
+ { name = "Randy Heiland", email = "heiland@iu.edu" },
13
+ { name = "Paul Macklin", email = "macklinp@iu.edu" },
14
+ { name = "Marwa Naji", email = "mnaji2@jh.edu"}
15
+ ]
16
+ readme = "README.md"
17
+ requires-python = ">=3.9"
18
+ license = { text = "BSD-3-Clause" }
19
+ keywords = [
20
+ "PhysiCell",
21
+ "bioinformatics",
22
+ "single-cell",
23
+ "initial conditions",
24
+ "agent-based modeling",
25
+ ]
26
+ classifiers = [
27
+ "Development Status :: 3 - Alpha",
28
+ "Intended Audience :: Science/Research",
29
+ "License :: OSI Approved :: BSD License",
30
+ "Programming Language :: Python :: 3.9",
31
+ "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
34
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
35
+ ]
36
+
37
+ # Minimal hard dependencies — deliberately lean.
38
+ # Optional data formats and GUI are declared below.
39
+ dependencies = [
40
+ "numpy>=1.22",
41
+ "pandas>=1.4",
42
+ ]
43
+
44
+ [project.optional-dependencies]
45
+ # pip install biwt[anndata] — enables .h5ad import
46
+ anndata = ["anndata>=0.9"]
47
+
48
+ # pip install biwt[seurat] — enables .rds (Seurat / SingleCellExperiment) import
49
+ seurat = ["anndata>=0.9", "anndata2ri>=1.1", "rpy2>=3.5"]
50
+
51
+ # pip install biwt[gui] — enables biwt.gui (PyQt5 walkthrough UI)
52
+ gui = ["PyQt5>=5.15", "matplotlib>=3.5"]
53
+
54
+ # pip install biwt[all] — everything
55
+ all = ["biwt[anndata,seurat,gui]"]
56
+
57
+ # pip install biwt[dev] — development / testing extras
58
+ dev = ["pytest>=7", "pytest-qt", "biwt[anndata,gui]"]
59
+ # TODO: add a CI job (or optional test marker) that installs biwt[seurat] and
60
+ # runs R-dependent tests once a strategy for provisioning R on CI is decided.
61
+
62
+ [tool.setuptools.packages.find]
63
+ where = ["src"]
64
+
65
+ [tool.setuptools.package-data]
66
+ # Include any non-Python assets inside the package (e.g. default XML templates)
67
+ "biwt.core.parameters" = ["*.xml", "*.json"]
68
+
69
+ [tool.pytest.ini_options]
70
+ testpaths = ["tests"]
71
+ addopts = "-v"
biwt-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,14 @@
1
+ """
2
+ biwt — BioInformatics WalkThrough
3
+
4
+ Core public surface:
5
+ from biwt import BiwtInput, BiwtResult, DomainSpec
6
+
7
+ GUI (requires biwt[gui]):
8
+ from biwt.gui import create_biwt_widget
9
+ """
10
+
11
+ from biwt.types import DomainSpec, BiwtInput, BiwtResult
12
+
13
+ __version__ = "0.1.0"
14
+ __all__ = ["DomainSpec", "BiwtInput", "BiwtResult"]
@@ -0,0 +1 @@
1
+ """biwt.core — data logic with no Qt dependency."""
@@ -0,0 +1,143 @@
1
+ """
2
+ Cell-type configuration logic — purely data, no Qt.
3
+
4
+ The walkthrough gathers user decisions (keep / merge / delete / rename) and
5
+ stores them as ``CellTypeAction`` objects inside a ``CellTypeConfig``.
6
+ ``CellTypeConfig.resolve()`` collapses those decisions into a flat
7
+ original_label → final_name mapping that ``positioning.py`` can consume.
8
+
9
+ ``suggest_name_mappings`` provides lightweight heuristic hints to the GUI
10
+ so it can pre-populate rename fields when Studio cell-type names are available.
11
+ Future: replace / augment with a cell-type registry / ontology lookup.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Optional
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Data classes
22
+ # ---------------------------------------------------------------------------
23
+
24
+ @dataclass
25
+ class CellTypeAction:
26
+ """Decision for one cell-type label discovered in the imported data.
27
+
28
+ Parameters
29
+ ----------
30
+ original_name:
31
+ The raw label as it appears in the data (e.g. ``"CD8+LAG3= T cell"``).
32
+ action:
33
+ One of ``"keep"``, ``"merge"``, ``"delete"``.
34
+ merge_target:
35
+ Required when ``action == "merge"``. The ``original_name`` of the
36
+ cell type to merge into. Transitively resolved by ``CellTypeConfig``.
37
+ final_name:
38
+ Override the displayed name. ``None`` means keep ``original_name``.
39
+ """
40
+ original_name: str
41
+ action: str = "keep" # "keep" | "merge" | "delete"
42
+ merge_target: Optional[str] = None # only when action == "merge"
43
+ final_name: Optional[str] = None # None → use original_name
44
+
45
+
46
+ @dataclass
47
+ class CellTypeConfig:
48
+ """Complete cell-type decision set for one BIWT walkthrough session.
49
+
50
+ Usage
51
+ -----
52
+ config = CellTypeConfig()
53
+ config.add(CellTypeAction("T cell", action="keep", final_name="tcell"))
54
+ config.add(CellTypeAction("CD8 T cell", action="merge", merge_target="T cell"))
55
+ config.add(CellTypeAction("Unknown", action="delete"))
56
+
57
+ mapping = config.resolve()
58
+ # → {"T cell": "tcell", "CD8 T cell": "tcell", "Unknown": None}
59
+ """
60
+ actions: dict = field(default_factory=dict) # original_name → CellTypeAction
61
+
62
+ def add(self, action: CellTypeAction) -> None:
63
+ self.actions[action.original_name] = action
64
+
65
+ def resolve_name(self, original: str, _seen: Optional[set] = None) -> Optional[str]:
66
+ """Return the final cell-type name for *original*, or ``None`` if deleted.
67
+
68
+ Handles transitive merges (A→B→C) and detects cycles defensively.
69
+ """
70
+ if _seen is None:
71
+ _seen = set()
72
+ if original in _seen:
73
+ # Cycle guard — fall back to original
74
+ return original
75
+ _seen.add(original)
76
+
77
+ a = self.actions.get(original)
78
+ if a is None:
79
+ return original
80
+ if a.action == "delete":
81
+ return None
82
+ if a.action == "merge":
83
+ if a.merge_target is None:
84
+ return original
85
+ return self.resolve_name(a.merge_target, _seen)
86
+ # action == "keep"
87
+ return a.final_name if a.final_name else original
88
+
89
+ def resolve(self) -> dict[str, Optional[str]]:
90
+ """Build a flat ``{original_label: final_name | None}`` mapping."""
91
+ return {name: self.resolve_name(name) for name in self.actions}
92
+
93
+ @property
94
+ def kept_names(self) -> list[str]:
95
+ """Unique final names that are not deleted."""
96
+ seen, result = set(), []
97
+ for final in self.resolve().values():
98
+ if final is not None and final not in seen:
99
+ seen.add(final)
100
+ result.append(final)
101
+ return result
102
+
103
+
104
+ # ---------------------------------------------------------------------------
105
+ # Name-suggestion heuristics
106
+ # ---------------------------------------------------------------------------
107
+
108
+ def suggest_name_mappings(
109
+ data_labels: list[str],
110
+ host_names: list[str],
111
+ ) -> dict[str, Optional[str]]:
112
+ """Suggest a Studio cell-type name for each data label.
113
+
114
+ Strategy (in priority order):
115
+ 1. Exact match (case-insensitive).
116
+ 2. Studio name is a substring of the data label (or vice-versa).
117
+
118
+ Returns a dict ``{data_label: studio_name | None}``.
119
+ ``None`` means no suggestion was found.
120
+
121
+ This is deliberately simple — good enough for pre-populating the GUI.
122
+ A future version will query a cell-type ontology / registry.
123
+ """
124
+ host_lower = {n.lower(): n for n in host_names}
125
+ suggestions: dict[str, Optional[str]] = {}
126
+
127
+ for label in data_labels:
128
+ label_lower = label.lower()
129
+ match: Optional[str] = None
130
+
131
+ # 1. Exact
132
+ if label_lower in host_lower:
133
+ match = host_lower[label_lower]
134
+ else:
135
+ # 2. Substring
136
+ for sl, sn in host_lower.items():
137
+ if sl in label_lower or label_lower in sl:
138
+ match = sn
139
+ break
140
+
141
+ suggestions[label] = match
142
+
143
+ return suggestions