msiverse 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.4
2
+ Name: msiverse
3
+ Version: 0.0.1
4
+ Summary: Python-first, biologist-friendly toolkit for MALDI-MSI analysis
5
+ Author: msiverse contributors
6
+ License: BSD-3-Clause
7
+ Project-URL: Repository, https://github.com/aqgy2749/msiverse
8
+ Project-URL: Documentation, https://msiverse.readthedocs.io
9
+ Keywords: mass-spectrometry-imaging,MALDI,MSI,spatial-metabolomics,bioinformatics,scverse
10
+ Classifier: Development Status :: 2 - Pre-Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy>=1.22
22
+ Requires-Dist: scipy>=1.9
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: scikit-learn>=1.1
25
+ Requires-Dist: matplotlib>=3.5
26
+ Provides-Extra: imzml
27
+ Requires-Dist: pyimzML>=1.5; extra == "imzml"
28
+ Provides-Extra: scverse
29
+ Requires-Dist: anndata>=0.9; extra == "scverse"
30
+ Requires-Dist: scanpy>=1.9; extra == "scverse"
31
+ Provides-Extra: deep
32
+ Requires-Dist: torch>=2.0; extra == "deep"
33
+ Provides-Extra: gui
34
+ Requires-Dist: napari[all]>=0.4.18; extra == "gui"
35
+ Requires-Dist: magicgui>=0.7; extra == "gui"
36
+ Provides-Extra: metaspace
37
+ Requires-Dist: metaspace2020>=2.0; extra == "metaspace"
38
+ Provides-Extra: workflow
39
+ Requires-Dist: pyyaml>=6.0; extra == "workflow"
40
+ Provides-Extra: all
41
+ Requires-Dist: pyimzML>=1.5; extra == "all"
42
+ Requires-Dist: anndata>=0.9; extra == "all"
43
+ Requires-Dist: scanpy>=1.9; extra == "all"
44
+ Requires-Dist: torch>=2.0; extra == "all"
45
+ Requires-Dist: napari[all]>=0.4.18; extra == "all"
46
+ Requires-Dist: magicgui>=0.7; extra == "all"
47
+ Requires-Dist: metaspace2020>=2.0; extra == "all"
48
+ Requires-Dist: pyyaml>=6.0; extra == "all"
49
+ Provides-Extra: dev
50
+ Requires-Dist: pytest>=7.0; extra == "dev"
51
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
52
+ Requires-Dist: ruff>=0.1; extra == "dev"
53
+
54
+ # msiverse
55
+
56
+ > A Python-first, biologist-friendly toolkit for MALDI-MSI analysis.
57
+
58
+ > [!WARNING]
59
+ > **Early development release (`0.0.1`).** `msiverse` is under active
60
+ > development. APIs, outputs, and behavior may change without notice, and this
61
+ > release is not recommended for production use.
62
+
63
+ `msiverse` is a reference implementation of the architecture recommended in the
64
+ *2026 MALDI-MSI Software Landscape* report. It addresses the seven critical
65
+ gaps identified in the open-source ecosystem and demonstrates a viable path to
66
+ a "Scanpy moment" for mass spectrometry imaging.
67
+
68
+ ---
69
+
70
+ ## What it does
71
+
72
+ | Module | Purpose | Report recommendation |
73
+ |---|---|---|
74
+ | `msiverse.core` | `MSIData` container with AnnData/SpatialData interop | Rec 1 |
75
+ | `msiverse.io` | imzML reader + synthetic data generator | Rec 3 |
76
+ | `msiverse.preprocess` | TIC / RMS norm, TopHat baseline, hotspot clip, log1p | parity w/ MALDIquant/rMSIproc |
77
+ | `msiverse.segment` | k-means, spatial k-means, **Spatial Shrunken Centroids** (Cardinal port) | Rec 1 |
78
+ | `msiverse.annotate` | local DB matcher + METASPACE adapter stub | Rec 4 |
79
+ | `msiverse.register` | landmark affine + thin-plate-spline; image warping | Rec 5 |
80
+ | `msiverse.multimodal` | MSI ↔ Visium/Xenium spot aggregation, MSI ↔ IF/IHC fusion | Rec 5 + scientific frontier |
81
+ | `msiverse.visualize` | ion images, segmentation maps, overview panels | core UX |
82
+ | `msiverse.deep` | PyTorch `Dataset` + `VAEEmbedding` (pyM²aia / msiPL style) | Rec 6 |
83
+ | `msiverse.workflow` | hashed, reproducible `Pipeline` + Snakemake config export | Rec 7 |
84
+ | `msiverse.gui` | napari plugin with ion-image browser widget | Rec 2 |
85
+
86
+ ## Installation
87
+
88
+ ```bash
89
+ # Minimal install
90
+ pip install -e .
91
+
92
+ # With scverse / DL / GUI extras
93
+ pip install -e ".[scverse,deep,gui,imzml,workflow]"
94
+
95
+ # Everything
96
+ pip install -e ".[all]"
97
+ ```
98
+
99
+ ## Quick start
100
+
101
+ ```python
102
+ from msiverse import io, preprocess, segment, annotate, visualize
103
+
104
+ # Synthetic MSI for tutorials/tests — no data download required
105
+ data = io.simulate_msi(height=80, width=80, n_features=200, n_regions=4)
106
+
107
+ # One-line preprocessing (baseline → TIC norm → hotspot → log1p)
108
+ data = preprocess.standard_pipeline(data)
109
+
110
+ # Cardinal-style Spatial Shrunken Centroids — first Python port
111
+ segment.spatial_shrunken_centroids(data, n_clusters=4, shrinkage=1.5)
112
+
113
+ # Local annotation against built-in lipid/metabolite DB
114
+ hits = annotate.annotate_local(data, polarity="positive", tol_ppm=5)
115
+
116
+ # Overview panel (TIC, mean spectrum, top features, segmentation, ...)
117
+ fig = visualize.overview(data, label_key="ssc")
118
+ fig.savefig("overview.png")
119
+ ```
120
+
121
+ ## Reproducible pipelines
122
+
123
+ ```python
124
+ from msiverse.workflow import Pipeline
125
+
126
+ p = (Pipeline("my_run")
127
+ .add("baseline", preprocess.baseline_correct, window=51)
128
+ .add("normalize", preprocess.normalize, method="tic")
129
+ .add("ssc", segment.spatial_shrunken_centroids, n_clusters=5))
130
+
131
+ result = p.run(data)
132
+ p.save_provenance("run.json") # JSON record with input/output hashes
133
+ p.to_snakemake_config("Snakefile.yaml") # HPC handoff
134
+ ```
135
+
136
+ ## scverse interop
137
+
138
+ ```python
139
+ adata = data.to_anndata() # → Scanpy / Squidpy / SpatialData
140
+ data2 = MSIData.from_anndata(adata)
141
+ ```
142
+
143
+ ## Same-section MSI + spatial transcriptomics
144
+
145
+ The scientific frontier identified in the report:
146
+
147
+ ```python
148
+ from msiverse.multimodal import integrate_with_visium
149
+
150
+ # Provide fiducial landmarks from both modalities
151
+ joint = integrate_with_visium(
152
+ msi=msi_data,
153
+ visium_adata=visium_adata,
154
+ msi_landmarks=msi_pts,
155
+ visium_landmarks=visium_pts,
156
+ aggregation="mean",
157
+ )
158
+ # joint.obsm['msi'] now contains MSI intensities per Visium spot
159
+ ```
160
+
161
+ ## GUI (napari)
162
+
163
+ ```python
164
+ import napari
165
+ from msiverse.gui import view_msi
166
+
167
+ viewer = view_msi(data, label_key="ssc")
168
+ napari.run()
169
+ ```
170
+
171
+ ## Tests
172
+
173
+ ```bash
174
+ pytest tests/ -v
175
+ ```
176
+
177
+ ## License
178
+
179
+ BSD-3-Clause.
180
+
181
+ ## Citation
182
+
183
+ If you use `msiverse` in your work, please cite the underlying methods:
184
+
185
+ - Cardinal v3: Bemis et al., *Nat. Methods* 20:1883 (2023)
186
+ - METASPACE-ML: Wadie et al., *Nat. Commun.* 15:9110 (2024)
187
+ - pyM²aia: Cordes et al., *Bioinformatics* 40:btae133 (2024)
188
+ - SMA: Vicari et al., *Nat. Biotechnol.* 42:1046 (2024)
@@ -0,0 +1,135 @@
1
+ # msiverse
2
+
3
+ > A Python-first, biologist-friendly toolkit for MALDI-MSI analysis.
4
+
5
+ > [!WARNING]
6
+ > **Early development release (`0.0.1`).** `msiverse` is under active
7
+ > development. APIs, outputs, and behavior may change without notice, and this
8
+ > release is not recommended for production use.
9
+
10
+ `msiverse` is a reference implementation of the architecture recommended in the
11
+ *2026 MALDI-MSI Software Landscape* report. It addresses the seven critical
12
+ gaps identified in the open-source ecosystem and demonstrates a viable path to
13
+ a "Scanpy moment" for mass spectrometry imaging.
14
+
15
+ ---
16
+
17
+ ## What it does
18
+
19
+ | Module | Purpose | Report recommendation |
20
+ |---|---|---|
21
+ | `msiverse.core` | `MSIData` container with AnnData/SpatialData interop | Rec 1 |
22
+ | `msiverse.io` | imzML reader + synthetic data generator | Rec 3 |
23
+ | `msiverse.preprocess` | TIC / RMS norm, TopHat baseline, hotspot clip, log1p | parity w/ MALDIquant/rMSIproc |
24
+ | `msiverse.segment` | k-means, spatial k-means, **Spatial Shrunken Centroids** (Cardinal port) | Rec 1 |
25
+ | `msiverse.annotate` | local DB matcher + METASPACE adapter stub | Rec 4 |
26
+ | `msiverse.register` | landmark affine + thin-plate-spline; image warping | Rec 5 |
27
+ | `msiverse.multimodal` | MSI ↔ Visium/Xenium spot aggregation, MSI ↔ IF/IHC fusion | Rec 5 + scientific frontier |
28
+ | `msiverse.visualize` | ion images, segmentation maps, overview panels | core UX |
29
+ | `msiverse.deep` | PyTorch `Dataset` + `VAEEmbedding` (pyM²aia / msiPL style) | Rec 6 |
30
+ | `msiverse.workflow` | hashed, reproducible `Pipeline` + Snakemake config export | Rec 7 |
31
+ | `msiverse.gui` | napari plugin with ion-image browser widget | Rec 2 |
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ # Minimal install
37
+ pip install -e .
38
+
39
+ # With scverse / DL / GUI extras
40
+ pip install -e ".[scverse,deep,gui,imzml,workflow]"
41
+
42
+ # Everything
43
+ pip install -e ".[all]"
44
+ ```
45
+
46
+ ## Quick start
47
+
48
+ ```python
49
+ from msiverse import io, preprocess, segment, annotate, visualize
50
+
51
+ # Synthetic MSI for tutorials/tests — no data download required
52
+ data = io.simulate_msi(height=80, width=80, n_features=200, n_regions=4)
53
+
54
+ # One-line preprocessing (baseline → TIC norm → hotspot → log1p)
55
+ data = preprocess.standard_pipeline(data)
56
+
57
+ # Cardinal-style Spatial Shrunken Centroids — first Python port
58
+ segment.spatial_shrunken_centroids(data, n_clusters=4, shrinkage=1.5)
59
+
60
+ # Local annotation against built-in lipid/metabolite DB
61
+ hits = annotate.annotate_local(data, polarity="positive", tol_ppm=5)
62
+
63
+ # Overview panel (TIC, mean spectrum, top features, segmentation, ...)
64
+ fig = visualize.overview(data, label_key="ssc")
65
+ fig.savefig("overview.png")
66
+ ```
67
+
68
+ ## Reproducible pipelines
69
+
70
+ ```python
71
+ from msiverse.workflow import Pipeline
72
+
73
+ p = (Pipeline("my_run")
74
+ .add("baseline", preprocess.baseline_correct, window=51)
75
+ .add("normalize", preprocess.normalize, method="tic")
76
+ .add("ssc", segment.spatial_shrunken_centroids, n_clusters=5))
77
+
78
+ result = p.run(data)
79
+ p.save_provenance("run.json") # JSON record with input/output hashes
80
+ p.to_snakemake_config("Snakefile.yaml") # HPC handoff
81
+ ```
82
+
83
+ ## scverse interop
84
+
85
+ ```python
86
+ adata = data.to_anndata() # → Scanpy / Squidpy / SpatialData
87
+ data2 = MSIData.from_anndata(adata)
88
+ ```
89
+
90
+ ## Same-section MSI + spatial transcriptomics
91
+
92
+ The scientific frontier identified in the report:
93
+
94
+ ```python
95
+ from msiverse.multimodal import integrate_with_visium
96
+
97
+ # Provide fiducial landmarks from both modalities
98
+ joint = integrate_with_visium(
99
+ msi=msi_data,
100
+ visium_adata=visium_adata,
101
+ msi_landmarks=msi_pts,
102
+ visium_landmarks=visium_pts,
103
+ aggregation="mean",
104
+ )
105
+ # joint.obsm['msi'] now contains MSI intensities per Visium spot
106
+ ```
107
+
108
+ ## GUI (napari)
109
+
110
+ ```python
111
+ import napari
112
+ from msiverse.gui import view_msi
113
+
114
+ viewer = view_msi(data, label_key="ssc")
115
+ napari.run()
116
+ ```
117
+
118
+ ## Tests
119
+
120
+ ```bash
121
+ pytest tests/ -v
122
+ ```
123
+
124
+ ## License
125
+
126
+ BSD-3-Clause.
127
+
128
+ ## Citation
129
+
130
+ If you use `msiverse` in your work, please cite the underlying methods:
131
+
132
+ - Cardinal v3: Bemis et al., *Nat. Methods* 20:1883 (2023)
133
+ - METASPACE-ML: Wadie et al., *Nat. Commun.* 15:9110 (2024)
134
+ - pyM²aia: Cordes et al., *Bioinformatics* 40:btae133 (2024)
135
+ - SMA: Vicari et al., *Nat. Biotechnol.* 42:1046 (2024)
@@ -0,0 +1,66 @@
1
+ """
2
+ msiverse: a Python-first, biologist-friendly toolkit for MALDI-MSI.
3
+
4
+ Designed to address the seven critical gaps in the MALDI-MSI software
5
+ ecosystem identified in our 2026 landscape review:
6
+
7
+ 1. End-to-end open-source Python platform
8
+ 2. (lobby) imzML 1.2 standardization — interim native handling here
9
+ 3. Vendor I/O coverage (via imzy/MSIGen adapters)
10
+ 4. Accessible deep learning (msiverse.deep)
11
+ 5. Better protein annotation (HIT-MAP-style; planned)
12
+ 6. FAIR / reproducibility (msiverse.workflow)
13
+ 7. Same-section ST + MSI integration (msiverse.multimodal)
14
+
15
+ Quick start
16
+ -----------
17
+ >>> from msiverse import io, preprocess, segment, annotate, visualize
18
+ >>> data = io.simulate_msi() # synthetic demo data
19
+ >>> data = preprocess.standard_pipeline(data) # baseline → norm → log
20
+ >>> segment.spatial_shrunken_centroids(data, n_clusters=5)
21
+ >>> annotate.annotate_local(data, tol_ppm=5)
22
+ >>> visualize.overview(data, label_key="ssc")
23
+
24
+ End-to-end (one-call)
25
+ ---------------------
26
+ >>> from msiverse.workflow import run_standard_workflow
27
+ >>> data, pipeline = run_standard_workflow(
28
+ ... data, n_clusters=5, output_dir="./results"
29
+ ... )
30
+
31
+ scverse interop
32
+ ---------------
33
+ >>> adata = data.to_anndata() # → Scanpy / Squidpy / SpatialData
34
+ >>> data2 = MSIData.from_anndata(adata)
35
+ """
36
+
37
+ __version__ = "0.0.1"
38
+
39
+ from .core import MSIData
40
+
41
+ # Submodule aliases for the canonical workflow
42
+ from . import (
43
+ io,
44
+ preprocess,
45
+ segment,
46
+ annotate,
47
+ register,
48
+ multimodal,
49
+ visualize,
50
+ workflow,
51
+ diagnostics,
52
+ )
53
+
54
+ __all__ = [
55
+ "MSIData",
56
+ "io",
57
+ "preprocess",
58
+ "segment",
59
+ "annotate",
60
+ "register",
61
+ "multimodal",
62
+ "visualize",
63
+ "workflow",
64
+ "diagnostics",
65
+ "__version__",
66
+ ]
@@ -0,0 +1,255 @@
1
+ """
2
+ msiverse.annotate
3
+ =================
4
+
5
+ Metabolite annotation for MSI data.
6
+
7
+ Strategy: provide a *thin local matcher* for offline workflows + an
8
+ optional adapter to METASPACE / METASPACE-ML for FDR-controlled cloud
9
+ annotation. Lipid in-source fragmentation (rMSIfragment-style) and
10
+ spatial coherence scoring (METASPACE's MSM) are stubbed for future work.
11
+
12
+ What works today:
13
+ - Local mass-only matching against a small built-in lipid/metabolite
14
+ reference list with common adducts.
15
+ - Adduct enumeration ([M+H]+, [M+Na]+, [M-H]-, [M+K]+, ...).
16
+ - Returns per-feature ranked candidates with mass error in ppm.
17
+
18
+ What's pluggable:
19
+ - The METASPACEClient stub mirrors the metaspace2020/python-client
20
+ API so it can be swapped in once a network is available.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from dataclasses import dataclass
26
+
27
+ import numpy as np
28
+ import pandas as pd
29
+
30
+ from ..core import MSIData
31
+
32
+
33
+ # Proton mass / common adduct deltas
34
+ PROTON = 1.00728
35
+ ADDUCTS = {
36
+ "[M+H]+": (+PROTON, +1),
37
+ "[M+Na]+": (+22.98922, +1),
38
+ "[M+K]+": (+38.96316, +1),
39
+ "[M+NH4]+": (+18.03383, +1),
40
+ "[M-H]-": (-PROTON, -1),
41
+ "[M+Cl]-": (+34.96885, -1),
42
+ "[M+FA-H]-": (+44.99765, -1), # formate adduct
43
+ }
44
+
45
+
46
+ # =============================================================================
47
+ # Tiny built-in database for demos / offline use
48
+ # =============================================================================
49
+ def builtin_db() -> pd.DataFrame:
50
+ """
51
+ A minimal panel of common biological metabolites and lipids.
52
+ Intended for tutorials and offline tests, not production annotation.
53
+ """
54
+ return pd.DataFrame(
55
+ [
56
+ # (name, formula, monoisotopic_mass, class)
57
+ ("Glucose", "C6H12O6", 180.0634, "Sugar"),
58
+ ("Cholesterol", "C27H46O", 386.3549, "Sterol"),
59
+ ("Phosphatidylcholine 34:1", "C42H82NO8P", 759.5778, "Lipid/PC"),
60
+ ("Phosphatidylcholine 36:2", "C44H84NO8P", 785.5935, "Lipid/PC"),
61
+ ("Sphingomyelin d18:1/16:0", "C39H79N2O6P", 702.5676, "Lipid/SM"),
62
+ ("Glutamic acid", "C5H9NO4", 147.0532, "Amino acid"),
63
+ ("ATP", "C10H16N5O13P3", 506.9957, "Nucleotide"),
64
+ ("Heme B", "C34H32FeN4O4", 616.1773, "Cofactor"),
65
+ ("Taurine", "C2H7NO3S", 125.0147, "Amino acid"),
66
+ ("Creatine", "C4H9N3O2", 131.0695, "Amino acid"),
67
+ ("Dopamine", "C8H11NO2", 153.0790, "Neurotransmitter"),
68
+ ("Acetylcholine", "C7H16NO2", 146.1181, "Neurotransmitter"),
69
+ ("PE 36:2", "C41H78NO8P", 743.5465, "Lipid/PE"),
70
+ ("LysoPC 16:0", "C24H50NO7P", 495.3325, "Lipid/LPC"),
71
+ ],
72
+ columns=["name", "formula", "neutral_mass", "class"],
73
+ )
74
+
75
+
76
+ # =============================================================================
77
+ # Local annotation
78
+ # =============================================================================
79
+ @dataclass
80
+ class AnnotationHit:
81
+ feature_idx: int
82
+ observed_mz: float
83
+ name: str
84
+ formula: str
85
+ adduct: str
86
+ theoretical_mz: float
87
+ ppm_error: float
88
+ db_class: str
89
+
90
+ def to_dict(self) -> dict:
91
+ return {
92
+ "feature_idx": self.feature_idx,
93
+ "observed_mz": self.observed_mz,
94
+ "name": self.name,
95
+ "formula": self.formula,
96
+ "adduct": self.adduct,
97
+ "theoretical_mz": self.theoretical_mz,
98
+ "ppm_error": self.ppm_error,
99
+ "class": self.db_class,
100
+ }
101
+
102
+
103
+ def annotate_local(
104
+ data: MSIData,
105
+ db: pd.DataFrame | None = None,
106
+ adducts: list[str] | None = None,
107
+ polarity: str = "positive",
108
+ tol_ppm: float = 5.0,
109
+ inplace: bool = True,
110
+ ) -> pd.DataFrame:
111
+ """
112
+ Annotate features against a local DataFrame database (mass-only).
113
+
114
+ Each feature is matched against every (compound, adduct) pair within
115
+ ±tol_ppm. Best hit per feature is written to var; full list returned.
116
+
117
+ Parameters
118
+ ----------
119
+ db : DataFrame, optional
120
+ Columns must include {name, formula, neutral_mass, class}.
121
+ Defaults to the built-in demo database.
122
+ adducts : list of str
123
+ Which adducts to consider. Defaults to mode-appropriate set.
124
+ polarity : 'positive' or 'negative'
125
+ Filters adducts by charge if `adducts` not provided.
126
+ tol_ppm : float
127
+ Match tolerance.
128
+
129
+ Returns
130
+ -------
131
+ DataFrame
132
+ All candidate hits, sorted by ppm_error.
133
+ """
134
+ if db is None:
135
+ db = builtin_db()
136
+ if adducts is None:
137
+ # Filter by charge from ADDUCTS table
138
+ sign = +1 if polarity == "positive" else -1
139
+ adducts = [a for a, (_, q) in ADDUCTS.items() if q == sign]
140
+
141
+ hits: list[AnnotationHit] = []
142
+ for j, observed_mz in enumerate(data.mz):
143
+ tol_da = observed_mz * tol_ppm * 1e-6
144
+ for _, row in db.iterrows():
145
+ for ad in adducts:
146
+ delta, _q = ADDUCTS[ad]
147
+ theo = row["neutral_mass"] + delta
148
+ if abs(theo - observed_mz) <= tol_da:
149
+ ppm = (observed_mz - theo) / theo * 1e6
150
+ hits.append(
151
+ AnnotationHit(
152
+ feature_idx=j,
153
+ observed_mz=float(observed_mz),
154
+ name=row["name"],
155
+ formula=row["formula"],
156
+ adduct=ad,
157
+ theoretical_mz=theo,
158
+ ppm_error=float(ppm),
159
+ db_class=row["class"],
160
+ )
161
+ )
162
+
163
+ if not hits:
164
+ result = pd.DataFrame(
165
+ columns=[
166
+ "feature_idx", "observed_mz", "name", "formula", "adduct",
167
+ "theoretical_mz", "ppm_error", "class",
168
+ ]
169
+ )
170
+ else:
171
+ result = pd.DataFrame([h.to_dict() for h in hits]).sort_values(
172
+ ["feature_idx", "ppm_error"], key=lambda s: s.abs() if s.name == "ppm_error" else s
173
+ )
174
+
175
+ if inplace:
176
+ # Annotate var with best hit per feature
177
+ best = (
178
+ result.loc[result.groupby("feature_idx")["ppm_error"].apply(lambda s: s.abs().idxmin())]
179
+ if not result.empty
180
+ else pd.DataFrame()
181
+ )
182
+ data.var["annotation"] = ""
183
+ data.var["adduct"] = ""
184
+ data.var["ppm_error"] = np.nan
185
+ data.var["compound_class"] = ""
186
+ if not best.empty:
187
+ for _, h in best.iterrows():
188
+ idx = int(h["feature_idx"])
189
+ data.var.iloc[idx, data.var.columns.get_loc("annotation")] = h["name"]
190
+ data.var.iloc[idx, data.var.columns.get_loc("adduct")] = h["adduct"]
191
+ data.var.iloc[idx, data.var.columns.get_loc("ppm_error")] = h["ppm_error"]
192
+ data.var.iloc[idx, data.var.columns.get_loc("compound_class")] = h["class"]
193
+ data.uns["annotation_db_size"] = len(db)
194
+ data.uns["annotation_tol_ppm"] = tol_ppm
195
+
196
+ return result
197
+
198
+
199
+ # =============================================================================
200
+ # METASPACE cloud adapter (stub; real call requires network + token)
201
+ # =============================================================================
202
+ class METASPACEClient:
203
+ """
204
+ Thin adapter to the METASPACE platform.
205
+
206
+ On a connected machine, install `metaspace2020` and pass an API token:
207
+
208
+ >>> client = METASPACEClient(api_key="your_token")
209
+ >>> ds_id = client.submit(imzml_path, metadata)
210
+ >>> hits = client.get_annotations(ds_id, fdr=0.1)
211
+
212
+ This class deliberately wraps but does not reimplement the
213
+ metaspace2020 client, to stay in sync with their API.
214
+ """
215
+
216
+ def __init__(self, api_key: str | None = None, host: str | None = None) -> None:
217
+ try:
218
+ from metaspace import SMInstance
219
+ except ImportError:
220
+ self._sm = None
221
+ self._unavailable_reason = (
222
+ "metaspace2020 package not installed. "
223
+ "Run `pip install metaspace2020`."
224
+ )
225
+ return
226
+ kwargs = {}
227
+ if api_key:
228
+ kwargs["api_key"] = api_key
229
+ if host:
230
+ kwargs["host"] = host
231
+ self._sm = SMInstance(**kwargs)
232
+ self._unavailable_reason = None
233
+
234
+ @property
235
+ def available(self) -> bool:
236
+ return self._sm is not None
237
+
238
+ def get_annotations(
239
+ self,
240
+ dataset_id: str,
241
+ fdr: float = 0.1,
242
+ database: str = "HMDB-v4",
243
+ ) -> pd.DataFrame:
244
+ """Fetch FDR-controlled annotations for an existing dataset."""
245
+ if not self.available:
246
+ raise RuntimeError(self._unavailable_reason)
247
+ ds = self._sm.dataset(id=dataset_id)
248
+ ann = ds.annotations(fdr=fdr, database=database)
249
+ return pd.DataFrame(ann)
250
+
251
+ def submit(self, imzml_path: str, metadata: dict) -> str:
252
+ """Submit a new dataset to METASPACE for annotation."""
253
+ if not self.available:
254
+ raise RuntimeError(self._unavailable_reason)
255
+ return self._sm.submit_dataset(imzml_path, metadata=metadata)