chatspatial 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatspatial/__init__.py +11 -0
- chatspatial/__main__.py +141 -0
- chatspatial/cli/__init__.py +7 -0
- chatspatial/config.py +53 -0
- chatspatial/models/__init__.py +85 -0
- chatspatial/models/analysis.py +513 -0
- chatspatial/models/data.py +2462 -0
- chatspatial/server.py +1763 -0
- chatspatial/spatial_mcp_adapter.py +720 -0
- chatspatial/tools/__init__.py +3 -0
- chatspatial/tools/annotation.py +1903 -0
- chatspatial/tools/cell_communication.py +1603 -0
- chatspatial/tools/cnv_analysis.py +605 -0
- chatspatial/tools/condition_comparison.py +595 -0
- chatspatial/tools/deconvolution/__init__.py +402 -0
- chatspatial/tools/deconvolution/base.py +318 -0
- chatspatial/tools/deconvolution/card.py +244 -0
- chatspatial/tools/deconvolution/cell2location.py +326 -0
- chatspatial/tools/deconvolution/destvi.py +144 -0
- chatspatial/tools/deconvolution/flashdeconv.py +101 -0
- chatspatial/tools/deconvolution/rctd.py +317 -0
- chatspatial/tools/deconvolution/spotlight.py +216 -0
- chatspatial/tools/deconvolution/stereoscope.py +109 -0
- chatspatial/tools/deconvolution/tangram.py +135 -0
- chatspatial/tools/differential.py +625 -0
- chatspatial/tools/embeddings.py +298 -0
- chatspatial/tools/enrichment.py +1863 -0
- chatspatial/tools/integration.py +807 -0
- chatspatial/tools/preprocessing.py +723 -0
- chatspatial/tools/spatial_domains.py +808 -0
- chatspatial/tools/spatial_genes.py +836 -0
- chatspatial/tools/spatial_registration.py +441 -0
- chatspatial/tools/spatial_statistics.py +1476 -0
- chatspatial/tools/trajectory.py +495 -0
- chatspatial/tools/velocity.py +405 -0
- chatspatial/tools/visualization/__init__.py +155 -0
- chatspatial/tools/visualization/basic.py +393 -0
- chatspatial/tools/visualization/cell_comm.py +699 -0
- chatspatial/tools/visualization/cnv.py +320 -0
- chatspatial/tools/visualization/core.py +684 -0
- chatspatial/tools/visualization/deconvolution.py +852 -0
- chatspatial/tools/visualization/enrichment.py +660 -0
- chatspatial/tools/visualization/integration.py +205 -0
- chatspatial/tools/visualization/main.py +164 -0
- chatspatial/tools/visualization/multi_gene.py +739 -0
- chatspatial/tools/visualization/persistence.py +335 -0
- chatspatial/tools/visualization/spatial_stats.py +469 -0
- chatspatial/tools/visualization/trajectory.py +639 -0
- chatspatial/tools/visualization/velocity.py +411 -0
- chatspatial/utils/__init__.py +115 -0
- chatspatial/utils/adata_utils.py +1372 -0
- chatspatial/utils/compute.py +327 -0
- chatspatial/utils/data_loader.py +499 -0
- chatspatial/utils/dependency_manager.py +462 -0
- chatspatial/utils/device_utils.py +165 -0
- chatspatial/utils/exceptions.py +185 -0
- chatspatial/utils/image_utils.py +267 -0
- chatspatial/utils/mcp_utils.py +137 -0
- chatspatial/utils/path_utils.py +243 -0
- chatspatial/utils/persistence.py +78 -0
- chatspatial/utils/scipy_compat.py +143 -0
- chatspatial-1.1.0.dist-info/METADATA +242 -0
- chatspatial-1.1.0.dist-info/RECORD +67 -0
- chatspatial-1.1.0.dist-info/WHEEL +5 -0
- chatspatial-1.1.0.dist-info/entry_points.txt +2 -0
- chatspatial-1.1.0.dist-info/licenses/LICENSE +21 -0
- chatspatial-1.1.0.dist-info/top_level.txt +1 -0
chatspatial/server.py
ADDED
|
@@ -0,0 +1,1763 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main server implementation for ChatSpatial using the Spatial MCP Adapter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
import warnings
|
|
8
|
+
from typing import Any, Optional, Union
|
|
9
|
+
|
|
10
|
+
# Suppress warnings to speed up startup
|
|
11
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
12
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
13
|
+
|
|
14
|
+
# CRITICAL: Disable progress bars to prevent stdout pollution
|
|
15
|
+
# This protects against accidental stdout usage if server is imported directly
|
|
16
|
+
os.environ["TQDM_DISABLE"] = "1"
|
|
17
|
+
|
|
18
|
+
# Suppress scanpy/squidpy verbosity
|
|
19
|
+
try:
|
|
20
|
+
import scanpy as sc
|
|
21
|
+
|
|
22
|
+
sc.settings.verbosity = 0
|
|
23
|
+
except ImportError:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
from mcp.server.fastmcp import Context # noqa: E402
|
|
27
|
+
from mcp.types import ImageContent # noqa: E402
|
|
28
|
+
|
|
29
|
+
from .models.analysis import AnnotationResult # noqa: E402
|
|
30
|
+
from .models.analysis import CellCommunicationResult # noqa: E402
|
|
31
|
+
from .models.analysis import CNVResult # noqa: E402
|
|
32
|
+
from .models.analysis import ConditionComparisonResult # noqa: E402
|
|
33
|
+
from .models.analysis import DeconvolutionResult # noqa: E402
|
|
34
|
+
from .models.analysis import DifferentialExpressionResult # noqa: E402
|
|
35
|
+
from .models.analysis import EnrichmentResult # noqa: E402
|
|
36
|
+
from .models.analysis import IntegrationResult # noqa: E402
|
|
37
|
+
from .models.analysis import PreprocessingResult # noqa: E402
|
|
38
|
+
from .models.analysis import RNAVelocityResult # noqa: E402
|
|
39
|
+
from .models.analysis import SpatialDomainResult # noqa: E402
|
|
40
|
+
from .models.analysis import SpatialStatisticsResult # noqa: E402
|
|
41
|
+
from .models.analysis import SpatialVariableGenesResult # noqa: E402
|
|
42
|
+
from .models.analysis import TrajectoryResult # noqa: E402
|
|
43
|
+
from .models.data import AnnotationParameters # noqa: E402
|
|
44
|
+
from .models.data import CellCommunicationParameters # noqa: E402
|
|
45
|
+
from .models.data import CNVParameters # noqa: E402
|
|
46
|
+
from .models.data import ColumnInfo # noqa: E402
|
|
47
|
+
from .models.data import ConditionComparisonParameters # noqa: E402
|
|
48
|
+
from .models.data import DeconvolutionParameters # noqa: E402
|
|
49
|
+
from .models.data import DifferentialExpressionParameters # noqa: E402
|
|
50
|
+
from .models.data import EnrichmentParameters # noqa: E402
|
|
51
|
+
from .models.data import IntegrationParameters # noqa: E402
|
|
52
|
+
from .models.data import PreprocessingParameters # noqa: E402
|
|
53
|
+
from .models.data import RNAVelocityParameters # noqa: E402
|
|
54
|
+
from .models.data import SpatialDataset # noqa: E402
|
|
55
|
+
from .models.data import SpatialDomainParameters # noqa: E402
|
|
56
|
+
from .models.data import SpatialStatisticsParameters # noqa: E402
|
|
57
|
+
from .models.data import SpatialVariableGenesParameters # noqa: E402
|
|
58
|
+
from .models.data import TrajectoryParameters # noqa: E402
|
|
59
|
+
from .models.data import VisualizationParameters # noqa: E402
|
|
60
|
+
from .spatial_mcp_adapter import ToolContext # noqa: E402
|
|
61
|
+
from .spatial_mcp_adapter import create_spatial_mcp_server # noqa: E402
|
|
62
|
+
from .spatial_mcp_adapter import get_tool_annotations # noqa: E402
|
|
63
|
+
from .utils.exceptions import DataNotFoundError # noqa: E402
|
|
64
|
+
from .utils.mcp_utils import mcp_tool_error_handler # noqa: E402
|
|
65
|
+
|
|
66
|
+
# Create MCP server and adapter
|
|
67
|
+
mcp, adapter = create_spatial_mcp_server("ChatSpatial")
|
|
68
|
+
|
|
69
|
+
# Get data manager and visualization registry from adapter
|
|
70
|
+
# These module-level aliases provide consistent access patterns
|
|
71
|
+
data_manager = adapter.data_manager
|
|
72
|
+
visualization_registry = adapter.visualization_registry
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def validate_dataset(data_id: str) -> None:
|
|
76
|
+
"""Validate that a dataset exists in the data store
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
data_id: Dataset ID
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
ValueError: If the dataset is not found
|
|
83
|
+
"""
|
|
84
|
+
if not data_manager.dataset_exists(data_id):
|
|
85
|
+
raise DataNotFoundError(f"Dataset {data_id} not found")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@mcp.tool(annotations=get_tool_annotations("load_data"))
|
|
89
|
+
@mcp_tool_error_handler()
|
|
90
|
+
async def load_data(
|
|
91
|
+
data_path: str,
|
|
92
|
+
data_type: str = "auto",
|
|
93
|
+
name: Optional[str] = None,
|
|
94
|
+
context: Optional[Context] = None,
|
|
95
|
+
) -> SpatialDataset:
|
|
96
|
+
"""Load spatial transcriptomics data with comprehensive metadata profile
|
|
97
|
+
|
|
98
|
+
Returns detailed information about the dataset structure to help with analysis:
|
|
99
|
+
- Cell and gene counts
|
|
100
|
+
- Available metadata columns with types and sample values
|
|
101
|
+
- Multi-dimensional data (spatial coordinates, dimensionality reduction, etc.)
|
|
102
|
+
- Gene expression profiles
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
data_path: Path to the data file or directory
|
|
106
|
+
data_type: Type of spatial data (auto, 10x_visium, slide_seq, merfish, seqfish, other, h5ad).
|
|
107
|
+
If 'auto', will try to determine the type from the file extension or directory structure.
|
|
108
|
+
name: Optional name for the dataset
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Comprehensive dataset information including metadata profiles
|
|
112
|
+
"""
|
|
113
|
+
# Create ToolContext for consistent logging
|
|
114
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
115
|
+
|
|
116
|
+
await ctx.info(f"Loading data from {data_path} (type: {data_type})")
|
|
117
|
+
|
|
118
|
+
# Load data using data manager
|
|
119
|
+
data_id = await data_manager.load_dataset(data_path, data_type, name)
|
|
120
|
+
dataset_info = await data_manager.get_dataset(data_id)
|
|
121
|
+
|
|
122
|
+
await ctx.info(
|
|
123
|
+
f"Successfully loaded {dataset_info['type']} data with "
|
|
124
|
+
f"{dataset_info['n_cells']} cells and {dataset_info['n_genes']} genes"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Convert column info from dict to ColumnInfo objects
|
|
128
|
+
obs_columns = (
|
|
129
|
+
[ColumnInfo(**col) for col in dataset_info.get("obs_columns", [])]
|
|
130
|
+
if dataset_info.get("obs_columns")
|
|
131
|
+
else None
|
|
132
|
+
)
|
|
133
|
+
var_columns = (
|
|
134
|
+
[ColumnInfo(**col) for col in dataset_info.get("var_columns", [])]
|
|
135
|
+
if dataset_info.get("var_columns")
|
|
136
|
+
else None
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Return comprehensive dataset information
|
|
140
|
+
return SpatialDataset(
|
|
141
|
+
id=data_id,
|
|
142
|
+
name=dataset_info["name"],
|
|
143
|
+
data_type=dataset_info["type"], # Use normalized type from dataset_info
|
|
144
|
+
description=f"Spatial data: {dataset_info['n_cells']} cells × {dataset_info['n_genes']} genes",
|
|
145
|
+
n_cells=dataset_info["n_cells"],
|
|
146
|
+
n_genes=dataset_info["n_genes"],
|
|
147
|
+
spatial_coordinates_available=dataset_info["spatial_coordinates_available"],
|
|
148
|
+
tissue_image_available=dataset_info["tissue_image_available"],
|
|
149
|
+
obs_columns=obs_columns,
|
|
150
|
+
var_columns=var_columns,
|
|
151
|
+
obsm_keys=dataset_info.get("obsm_keys"),
|
|
152
|
+
uns_keys=dataset_info.get("uns_keys"),
|
|
153
|
+
top_highly_variable_genes=dataset_info.get("top_highly_variable_genes"),
|
|
154
|
+
top_expressed_genes=dataset_info.get("top_expressed_genes"),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@mcp.tool(annotations=get_tool_annotations("preprocess_data"))
|
|
159
|
+
@mcp_tool_error_handler()
|
|
160
|
+
async def preprocess_data(
|
|
161
|
+
data_id: str,
|
|
162
|
+
params: PreprocessingParameters = PreprocessingParameters(),
|
|
163
|
+
context: Optional[Context] = None,
|
|
164
|
+
) -> PreprocessingResult:
|
|
165
|
+
"""Preprocess spatial transcriptomics data
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
data_id: Dataset ID
|
|
169
|
+
params: Preprocessing parameters
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Preprocessing result
|
|
173
|
+
|
|
174
|
+
Notes:
|
|
175
|
+
Available normalization methods:
|
|
176
|
+
- log: Standard log normalization (default)
|
|
177
|
+
- sct: SCTransform v2 variance-stabilizing normalization (requires pysctransform)
|
|
178
|
+
Install: pip install 'chatspatial[sct]'
|
|
179
|
+
Best for raw UMI counts from 10x platforms (Visium, etc.)
|
|
180
|
+
Based on regularized negative binomial regression (Hafemeister & Satija 2019)
|
|
181
|
+
- pearson_residuals: Analytic Pearson residuals (built-in, similar to SCTransform)
|
|
182
|
+
Faster than SCTransform with comparable results for most analyses
|
|
183
|
+
- none: No normalization
|
|
184
|
+
- scvi: Use scVI for normalization and dimensionality reduction
|
|
185
|
+
|
|
186
|
+
SCTransform-specific parameters (only used when normalization='sct'):
|
|
187
|
+
- sct_method: 'fix-slope' (v2, default) or 'offset' (v1)
|
|
188
|
+
- sct_var_features_n: Number of variable features (default: 3000)
|
|
189
|
+
- sct_exclude_poisson: Exclude Poisson genes from regularization (default: True)
|
|
190
|
+
- sct_n_cells: Number of cells for parameter estimation (default: 5000)
|
|
191
|
+
|
|
192
|
+
When use_scvi_preprocessing=True, scVI will be used for advanced preprocessing
|
|
193
|
+
including denoising and batch effect correction.
|
|
194
|
+
|
|
195
|
+
Advanced configuration options:
|
|
196
|
+
- n_neighbors: Number of neighbors for graph construction (default: 15)
|
|
197
|
+
- clustering_resolution: Leiden clustering resolution (default: 1.0)
|
|
198
|
+
- clustering_key: Key name for storing clustering results (default: "leiden")
|
|
199
|
+
- spatial_key: Key name for spatial coordinates in obsm (default: None, auto-detected)
|
|
200
|
+
- batch_key: Key name for batch information in obs (default: "batch")
|
|
201
|
+
|
|
202
|
+
IMPORTANT: This preprocessing creates a filtered gene set for analysis efficiency.
|
|
203
|
+
Raw data is automatically preserved in adata.raw for downstream analyses requiring
|
|
204
|
+
comprehensive gene coverage (e.g., cell communication analysis with LIANA+).
|
|
205
|
+
|
|
206
|
+
Cell communication analysis automatically uses adata.raw when available.
|
|
207
|
+
"""
|
|
208
|
+
# Validate dataset
|
|
209
|
+
validate_dataset(data_id)
|
|
210
|
+
|
|
211
|
+
# Create ToolContext
|
|
212
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
213
|
+
|
|
214
|
+
# Lazy import (avoid name conflict with MCP tool)
|
|
215
|
+
from .tools.preprocessing import preprocess_data as preprocess_func
|
|
216
|
+
|
|
217
|
+
# Call preprocessing function
|
|
218
|
+
result = await preprocess_func(data_id, ctx, params)
|
|
219
|
+
|
|
220
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
221
|
+
|
|
222
|
+
# Save preprocessing result
|
|
223
|
+
await data_manager.save_result(data_id, "preprocessing", result)
|
|
224
|
+
|
|
225
|
+
return result
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@mcp.tool(annotations=get_tool_annotations("compute_embeddings"))
|
|
229
|
+
@mcp_tool_error_handler()
|
|
230
|
+
async def compute_embeddings(
|
|
231
|
+
data_id: str,
|
|
232
|
+
compute_pca: bool = True,
|
|
233
|
+
compute_neighbors: bool = True,
|
|
234
|
+
compute_umap: bool = True,
|
|
235
|
+
compute_clustering: bool = True,
|
|
236
|
+
compute_diffmap: bool = False,
|
|
237
|
+
compute_spatial_neighbors: bool = True,
|
|
238
|
+
n_pcs: int = 30,
|
|
239
|
+
n_neighbors: int = 15,
|
|
240
|
+
clustering_resolution: float = 1.0,
|
|
241
|
+
clustering_method: str = "leiden",
|
|
242
|
+
force: bool = False,
|
|
243
|
+
context: Optional[Context] = None,
|
|
244
|
+
) -> dict[str, Any]:
|
|
245
|
+
"""Compute dimensionality reduction, clustering, and neighbor graphs.
|
|
246
|
+
|
|
247
|
+
This tool provides explicit control over embedding computations.
|
|
248
|
+
Analysis tools compute these lazily on-demand, but you can use this tool to:
|
|
249
|
+
- Control computation parameters (n_pcs, n_neighbors, resolution)
|
|
250
|
+
- Force recomputation with different parameters
|
|
251
|
+
- Compute specific embeddings independently
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
data_id: Dataset ID
|
|
255
|
+
compute_pca: Compute PCA dimensionality reduction
|
|
256
|
+
compute_neighbors: Compute k-NN neighbor graph
|
|
257
|
+
compute_umap: Compute UMAP embedding
|
|
258
|
+
compute_clustering: Compute Leiden/Louvain clustering
|
|
259
|
+
compute_diffmap: Compute diffusion map for trajectory analysis
|
|
260
|
+
compute_spatial_neighbors: Compute spatial neighborhood graph
|
|
261
|
+
n_pcs: Number of principal components (default: 30)
|
|
262
|
+
n_neighbors: Number of neighbors for k-NN graph (default: 15)
|
|
263
|
+
clustering_resolution: Clustering resolution (default: 1.0)
|
|
264
|
+
clustering_method: Clustering algorithm ('leiden' or 'louvain')
|
|
265
|
+
force: Force recomputation even if results already exist
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Summary of computed embeddings
|
|
269
|
+
"""
|
|
270
|
+
# Validate dataset
|
|
271
|
+
validate_dataset(data_id)
|
|
272
|
+
|
|
273
|
+
# Lazy import
|
|
274
|
+
from .tools.embeddings import EmbeddingParameters
|
|
275
|
+
from .tools.embeddings import compute_embeddings as compute_embeddings_func
|
|
276
|
+
|
|
277
|
+
# Create parameters
|
|
278
|
+
params = EmbeddingParameters(
|
|
279
|
+
compute_pca=compute_pca,
|
|
280
|
+
compute_neighbors=compute_neighbors,
|
|
281
|
+
compute_umap=compute_umap,
|
|
282
|
+
compute_clustering=compute_clustering,
|
|
283
|
+
compute_diffmap=compute_diffmap,
|
|
284
|
+
compute_spatial_neighbors=compute_spatial_neighbors,
|
|
285
|
+
n_pcs=n_pcs,
|
|
286
|
+
n_neighbors=n_neighbors,
|
|
287
|
+
clustering_resolution=clustering_resolution,
|
|
288
|
+
clustering_method=clustering_method,
|
|
289
|
+
force=force,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Create ToolContext
|
|
293
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
294
|
+
|
|
295
|
+
# Call function
|
|
296
|
+
result = await compute_embeddings_func(data_id, ctx, params)
|
|
297
|
+
|
|
298
|
+
return result.model_dump()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@mcp.tool(annotations=get_tool_annotations("visualize_data"))
|
|
302
|
+
@mcp_tool_error_handler() # Handles type-aware error formatting for Image/str returns
|
|
303
|
+
async def visualize_data(
|
|
304
|
+
data_id: str,
|
|
305
|
+
params: VisualizationParameters = VisualizationParameters(),
|
|
306
|
+
context: Optional[Context] = None,
|
|
307
|
+
) -> Union[
|
|
308
|
+
ImageContent, str
|
|
309
|
+
]: # Simplified: ImageContent or str (MCP 2025 best practice)
|
|
310
|
+
"""Visualize spatial transcriptomics data
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
data_id: Dataset ID
|
|
314
|
+
params: Visualization parameters including:
|
|
315
|
+
- plot_type: Type of visualization. Available types:
|
|
316
|
+
* Basic plots: spatial, heatmap, violin, umap, dotplot
|
|
317
|
+
* Analysis results: cell_communication, deconvolution,
|
|
318
|
+
trajectory, rna_velocity, spatial_statistics
|
|
319
|
+
* Multi-gene/correlation: multi_gene, lr_pairs, gene_correlation
|
|
320
|
+
* Enrichment: pathway_enrichment (use subtype for spatial EnrichMap)
|
|
321
|
+
* Integration/QC: spatial_interaction, batch_integration
|
|
322
|
+
* CNV analysis: cnv_heatmap, spatial_cnv
|
|
323
|
+
* High-resolution: card_imputation
|
|
324
|
+
- feature: Gene or feature to visualize (single/multiple genes). For cell types,
|
|
325
|
+
use method-specific columns: 'cell_type_tangram', 'cell_type_scanvi',
|
|
326
|
+
'cell_type_cellassign', or clustering: 'leiden', 'louvain'.
|
|
327
|
+
For spatial domains: use the domain_key returned by identify_spatial_domains
|
|
328
|
+
(e.g., 'spatial_domains_spagcn', 'spatial_domains_leiden')
|
|
329
|
+
- cluster_key: Column in adata.obs for grouping (e.g., 'leiden', 'cell_type').
|
|
330
|
+
REQUIRED for heatmap, violin, and dotplot
|
|
331
|
+
- subtype: Visualization variant. Required for certain plot_types:
|
|
332
|
+
* deconvolution: 'spatial_multi', 'dominant_type', 'diversity', 'stacked_bar', 'scatterpie', 'umap'
|
|
333
|
+
* spatial_statistics: 'neighborhood', 'co_occurrence', 'ripley', 'moran', 'centrality', 'getis_ord'
|
|
334
|
+
* pathway_enrichment: 'barplot', 'dotplot', 'spatial_score', 'spatial_correlogram'
|
|
335
|
+
- deconv_method: Deconvolution method ('cell2location', 'rctd', etc.).
|
|
336
|
+
Auto-selected if only one result exists
|
|
337
|
+
- batch_key: Column for batch/sample identifier (default: 'batch'). Required for batch_integration
|
|
338
|
+
- colormap: Color scheme (default: 'coolwarm')
|
|
339
|
+
- figure_size: Tuple (width, height) in inches. Auto-determined if None
|
|
340
|
+
- dpi: Image resolution (default: 300, publication quality)
|
|
341
|
+
- spot_size: Spot size for spatial plots (default: 150). Adjust for density: dense data 100-150, sparse 150-200
|
|
342
|
+
- alpha_img: Background tissue image opacity (default: 0.3). Lower = dimmer background
|
|
343
|
+
- n_cell_types: Number of top cell types in deconvolution (default: 4, max: 10)
|
|
344
|
+
- lr_pairs: List of (ligand, receptor) tuples for lr_pairs plot_type
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Visualization image
|
|
348
|
+
|
|
349
|
+
Examples:
|
|
350
|
+
# Basic spatial plot
|
|
351
|
+
{"plot_type": "spatial", "feature": "Cd7", "colormap": "viridis"}
|
|
352
|
+
|
|
353
|
+
# Cell type visualization
|
|
354
|
+
{"plot_type": "spatial", "feature": "cell_type_tangram", "colormap": "tab20",
|
|
355
|
+
"spot_size": 150, "alpha_img": 0.3}
|
|
356
|
+
|
|
357
|
+
# Violin plot (cluster_key required)
|
|
358
|
+
{"plot_type": "violin", "feature": ["Cd7", "Cd3d"], "cluster_key": "leiden"}
|
|
359
|
+
|
|
360
|
+
# Heatmap (cluster_key required)
|
|
361
|
+
{"plot_type": "heatmap", "feature": ["Cd7", "Cd3d"], "cluster_key": "cell_type"}
|
|
362
|
+
|
|
363
|
+
# Dotplot - marker gene expression (cluster_key required)
|
|
364
|
+
{"plot_type": "dotplot", "feature": ["Cd3d", "Cd4", "Cd8a", "Cd19"],
|
|
365
|
+
"cluster_key": "cell_type", "colormap": "Reds"}
|
|
366
|
+
|
|
367
|
+
# Spatial domains (use domain_key from identify_spatial_domains result)
|
|
368
|
+
{"plot_type": "spatial", "feature": "spatial_domains_spagcn", "colormap": "tab20"}
|
|
369
|
+
|
|
370
|
+
# Deconvolution results
|
|
371
|
+
{"plot_type": "deconvolution", "subtype": "dominant_type", "deconv_method": "cell2location",
|
|
372
|
+
"n_cell_types": 6}
|
|
373
|
+
|
|
374
|
+
# Spatial statistics
|
|
375
|
+
{"plot_type": "spatial_statistics", "subtype": "neighborhood", "cluster_key": "leiden"}
|
|
376
|
+
|
|
377
|
+
# Ligand-receptor pairs
|
|
378
|
+
{"plot_type": "lr_pairs", "lr_pairs": [("Fn1", "Cd79a"), ("Vegfa", "Nrp2")]}
|
|
379
|
+
|
|
380
|
+
# Batch integration QC
|
|
381
|
+
{"plot_type": "batch_integration", "batch_key": "sample_id"}
|
|
382
|
+
"""
|
|
383
|
+
# Import to avoid name conflict
|
|
384
|
+
from .tools.visualization import visualize_data as visualize_func
|
|
385
|
+
|
|
386
|
+
# Validate dataset
|
|
387
|
+
validate_dataset(data_id)
|
|
388
|
+
|
|
389
|
+
# Create ToolContext for clean data access
|
|
390
|
+
ctx = ToolContext(
|
|
391
|
+
_data_manager=data_manager,
|
|
392
|
+
_mcp_context=context,
|
|
393
|
+
_visualization_registry=visualization_registry,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
# Parameter validation is handled by Pydantic model
|
|
397
|
+
# params is already a validated VisualizationParameters instance
|
|
398
|
+
|
|
399
|
+
# Call visualization function with ToolContext
|
|
400
|
+
image = await visualize_func(data_id, ctx, params)
|
|
401
|
+
|
|
402
|
+
# Store visualization params and return the image
|
|
403
|
+
if image is not None:
|
|
404
|
+
# Generate cache key with subtype if applicable
|
|
405
|
+
# This handles plot types with subtypes (e.g., deconvolution, spatial_statistics)
|
|
406
|
+
subtype = params.subtype # Optional field with default None
|
|
407
|
+
|
|
408
|
+
if subtype:
|
|
409
|
+
cache_key = f"{data_id}_{params.plot_type}_{subtype}"
|
|
410
|
+
else:
|
|
411
|
+
cache_key = f"{data_id}_{params.plot_type}"
|
|
412
|
+
|
|
413
|
+
# Handle two return types: str (large images) or ImageContent (small images)
|
|
414
|
+
# Extract file_path if image is saved to disk
|
|
415
|
+
file_path = None
|
|
416
|
+
# Large image: file path returned as text (MCP 2025 best practice)
|
|
417
|
+
# Extract path from message (format: "Visualization saved: <path>\n...")
|
|
418
|
+
if isinstance(image, str) and "Visualization saved:" in image:
|
|
419
|
+
file_path = image.split("\n")[0].replace("Visualization saved: ", "")
|
|
420
|
+
|
|
421
|
+
# Store visualization params in registry (for regeneration on demand)
|
|
422
|
+
ctx.store_visualization(cache_key, params, file_path)
|
|
423
|
+
|
|
424
|
+
await ctx.info(
|
|
425
|
+
f"Visualization type: {params.plot_type}, feature: {params.feature or 'N/A'}"
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
return image
|
|
429
|
+
|
|
430
|
+
else:
|
|
431
|
+
# Return error message if no image was generated
|
|
432
|
+
return "Visualization generation failed, please check the data and parameter settings."
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
@mcp.tool(annotations=get_tool_annotations("save_visualization"))
|
|
436
|
+
@mcp_tool_error_handler()
|
|
437
|
+
async def save_visualization(
|
|
438
|
+
data_id: str,
|
|
439
|
+
plot_type: str,
|
|
440
|
+
subtype: Optional[str] = None,
|
|
441
|
+
output_dir: str = "./outputs",
|
|
442
|
+
filename: Optional[str] = None,
|
|
443
|
+
format: str = "png",
|
|
444
|
+
dpi: Optional[int] = None,
|
|
445
|
+
context: Optional[Context] = None,
|
|
446
|
+
) -> str:
|
|
447
|
+
"""Save a visualization to disk at publication quality
|
|
448
|
+
|
|
449
|
+
This function regenerates visualizations from stored metadata and the original
|
|
450
|
+
data, then exports at the requested quality. This secure approach avoids
|
|
451
|
+
unsafe pickle deserialization.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
data_id: Dataset ID
|
|
455
|
+
plot_type: Type of plot to save (e.g., 'spatial', 'umap', 'deconvolution', 'spatial_statistics')
|
|
456
|
+
subtype: Optional subtype for plot types with variants (e.g., 'neighborhood', 'scatterpie')
|
|
457
|
+
- For pathway_enrichment: 'enrichment_plot', 'barplot', 'dotplot', 'spatial'
|
|
458
|
+
- For deconvolution: 'spatial_multi', 'dominant_type', 'diversity', 'stacked_bar', 'scatterpie', 'umap'
|
|
459
|
+
- For spatial_statistics: 'neighborhood', 'co_occurrence', 'ripley', 'moran', 'centrality', 'getis_ord'
|
|
460
|
+
output_dir: Directory to save the file (default: ./outputs)
|
|
461
|
+
filename: Custom filename (optional, auto-generated if not provided)
|
|
462
|
+
format: Image format (png, jpg, pdf, svg)
|
|
463
|
+
dpi: DPI for saved image (default: 300 for publication quality)
|
|
464
|
+
For publication quality, use 300+ DPI
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Path to the saved file
|
|
468
|
+
|
|
469
|
+
Examples:
|
|
470
|
+
Save a spatial plot: save_visualization("data1", "spatial")
|
|
471
|
+
Save with subtype: save_visualization("data1", "spatial_statistics", subtype="neighborhood")
|
|
472
|
+
Save deconvolution: save_visualization("data1", "deconvolution", subtype="scatterpie", format="pdf")
|
|
473
|
+
Save for publication: save_visualization("data1", "spatial", dpi=300, format="png")
|
|
474
|
+
"""
|
|
475
|
+
from .tools.visualization import save_visualization as save_func
|
|
476
|
+
|
|
477
|
+
# Create ToolContext for unified data access
|
|
478
|
+
ctx = ToolContext(
|
|
479
|
+
_data_manager=data_manager,
|
|
480
|
+
_mcp_context=context,
|
|
481
|
+
_visualization_registry=visualization_registry,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
result = await save_func(
|
|
485
|
+
data_id=data_id,
|
|
486
|
+
ctx=ctx,
|
|
487
|
+
plot_type=plot_type,
|
|
488
|
+
subtype=subtype,
|
|
489
|
+
output_dir=output_dir,
|
|
490
|
+
filename=filename,
|
|
491
|
+
format=format,
|
|
492
|
+
dpi=dpi,
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
return result
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
@mcp.tool(annotations=get_tool_annotations("export_all_visualizations"))
|
|
499
|
+
@mcp_tool_error_handler()
|
|
500
|
+
async def export_all_visualizations(
|
|
501
|
+
data_id: str,
|
|
502
|
+
output_dir: str = "./exports",
|
|
503
|
+
format: str = "png",
|
|
504
|
+
dpi: Optional[int] = None,
|
|
505
|
+
context: Optional[Context] = None,
|
|
506
|
+
) -> list[str]:
|
|
507
|
+
"""Export all cached visualizations for a dataset to disk
|
|
508
|
+
|
|
509
|
+
This function regenerates each visualization from stored metadata and the original
|
|
510
|
+
data, then exports at the requested quality. This secure approach avoids
|
|
511
|
+
unsafe pickle deserialization.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
data_id: Dataset ID to export visualizations for
|
|
515
|
+
output_dir: Directory to save files (default: ./exports)
|
|
516
|
+
format: Image format (png, jpg, jpeg, pdf, svg, eps, ps, tiff) (default: png)
|
|
517
|
+
dpi: DPI for raster formats (default: 300 for publication quality)
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
List of paths to saved files
|
|
521
|
+
|
|
522
|
+
Examples:
|
|
523
|
+
# Export all visualizations as PNG
|
|
524
|
+
export_all_visualizations("data1")
|
|
525
|
+
|
|
526
|
+
# Export all as PDF for publication
|
|
527
|
+
export_all_visualizations("data1", format="pdf", dpi=300)
|
|
528
|
+
|
|
529
|
+
# Export to custom directory as SVG
|
|
530
|
+
export_all_visualizations("data1", "./my_exports", format="svg")
|
|
531
|
+
"""
|
|
532
|
+
from .tools.visualization import export_all_visualizations as export_func
|
|
533
|
+
|
|
534
|
+
# Create ToolContext for unified data access
|
|
535
|
+
ctx = ToolContext(
|
|
536
|
+
_data_manager=data_manager,
|
|
537
|
+
_mcp_context=context,
|
|
538
|
+
_visualization_registry=visualization_registry,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
result = await export_func(
|
|
542
|
+
data_id=data_id,
|
|
543
|
+
ctx=ctx,
|
|
544
|
+
output_dir=output_dir,
|
|
545
|
+
format=format,
|
|
546
|
+
dpi=dpi,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
return result
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
@mcp.tool(annotations=get_tool_annotations("clear_visualization_cache"))
|
|
553
|
+
@mcp_tool_error_handler()
|
|
554
|
+
async def clear_visualization_cache(
|
|
555
|
+
data_id: Optional[str] = None,
|
|
556
|
+
context: Optional[Context] = None,
|
|
557
|
+
) -> int:
|
|
558
|
+
"""Clear visualization cache to free memory
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
data_id: Optional dataset ID to clear specific visualizations (if None, clears all)
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
Number of visualizations cleared
|
|
565
|
+
|
|
566
|
+
Examples:
|
|
567
|
+
Clear all visualizations: clear_visualization_cache()
|
|
568
|
+
Clear for specific dataset: clear_visualization_cache("data1")
|
|
569
|
+
"""
|
|
570
|
+
from .tools.visualization import clear_visualization_cache as clear_func
|
|
571
|
+
|
|
572
|
+
# Create ToolContext for unified data access
|
|
573
|
+
ctx = ToolContext(
|
|
574
|
+
_data_manager=data_manager,
|
|
575
|
+
_mcp_context=context,
|
|
576
|
+
_visualization_registry=visualization_registry,
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
result = await clear_func(ctx=ctx, data_id=data_id)
|
|
580
|
+
|
|
581
|
+
return result
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
@mcp.tool(annotations=get_tool_annotations("annotate_cell_types"))
|
|
585
|
+
@mcp_tool_error_handler()
|
|
586
|
+
async def annotate_cell_types(
|
|
587
|
+
data_id: str,
|
|
588
|
+
params: AnnotationParameters = AnnotationParameters(),
|
|
589
|
+
context: Optional[Context] = None,
|
|
590
|
+
) -> AnnotationResult:
|
|
591
|
+
"""Annotate cell types in spatial transcriptomics data
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
data_id: Dataset ID
|
|
595
|
+
params: Annotation parameters
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
Annotation result with cell type information and optional visualization
|
|
599
|
+
|
|
600
|
+
Notes:
|
|
601
|
+
Annotation methods (status):
|
|
602
|
+
- tangram: Implemented (requires reference_data_id and PREPROCESSED reference data with HVGs)
|
|
603
|
+
- scanvi: Implemented (deep learning label transfer via scvi-tools, requires reference_data_id)
|
|
604
|
+
- cellassign: Implemented (via scvi-tools, requires marker_genes parameter)
|
|
605
|
+
- mllmcelltype: Implemented (multimodal LLM classifier)
|
|
606
|
+
- sctype: Implemented (requires R and rpy2)
|
|
607
|
+
- singler: Implemented (Python-based via singler/celldex packages, requires singler_reference parameter)
|
|
608
|
+
|
|
609
|
+
For methods requiring reference data (tangram, scanvi, singler):
|
|
610
|
+
- tangram/scanvi: reference_data_id must point to a loaded AND PREPROCESSED single-cell dataset
|
|
611
|
+
- IMPORTANT: Reference data MUST be preprocessed with preprocess_data() before use!
|
|
612
|
+
- cell_type_key: Leave as None for auto-detection. Only set if you know the exact column name in reference data
|
|
613
|
+
- Common cell type column names: 'cell_type', 'cell_types', 'celltype'
|
|
614
|
+
- singler: Can use either reference_data_id OR singler_reference (celldex built-in references)
|
|
615
|
+
|
|
616
|
+
Tangram-specific notes:
|
|
617
|
+
- Method: Deep learning-based spatial mapping of single-cell to spatial transcriptomics
|
|
618
|
+
- Requires: reference_data_id with PREPROCESSED single-cell data
|
|
619
|
+
- Mapping modes (mode parameter):
|
|
620
|
+
* mode="cells" (default): Maps individual cells to spatial locations
|
|
621
|
+
- Preserves single-cell heterogeneity and fine-grained resolution
|
|
622
|
+
- More computationally intensive (GPU recommended for large datasets)
|
|
623
|
+
- Best for: Same specimen data, when cell-level detail is critical
|
|
624
|
+
* mode="clusters" (recommended for cross-specimen): Aggregates cells by type before mapping
|
|
625
|
+
- Dramatically improves performance, runs on standard laptop
|
|
626
|
+
- Official recommendation: "Our choice when scRNAseq and spatial data come from different specimens"
|
|
627
|
+
- Requires: cluster_label parameter (e.g., "cell_type")
|
|
628
|
+
- Best for: Different specimens, limited resources, cell type distributions
|
|
629
|
+
- Trades single-cell resolution for stability and speed
|
|
630
|
+
- Confidence scores: Automatically normalized to [0, 1] probability range
|
|
631
|
+
- GPU acceleration: Set tangram_device='cuda:0' if GPU available
|
|
632
|
+
- Other parameters: tangram_density_prior, tangram_learning_rate, tangram_lambda_r
|
|
633
|
+
|
|
634
|
+
scANVI-specific notes:
|
|
635
|
+
- Method: Semi-supervised variational inference for label transfer
|
|
636
|
+
- Requires: Both datasets must have 'counts' layer (raw counts)
|
|
637
|
+
- Architecture: Configurable via scanvi_n_latent, scanvi_n_hidden, scanvi_dropout_rate
|
|
638
|
+
- Small datasets (<1000 genes/cells): Use scanvi_n_latent=3-5, scanvi_dropout_rate=0.2,
|
|
639
|
+
scanvi_use_scvi_pretrain=False, num_epochs=50 to prevent NaN errors
|
|
640
|
+
- Returns probabilistic cell type predictions with confidence scores
|
|
641
|
+
- GPU acceleration available (set tangram_device='cuda:0' if available)
|
|
642
|
+
|
|
643
|
+
SingleR-specific notes:
|
|
644
|
+
- Method: Reference-based correlation matching for cell type annotation
|
|
645
|
+
- Reference options:
|
|
646
|
+
* Built-in celldex references (via singler_reference parameter):
|
|
647
|
+
- Human: 'hpca' (recommended), 'blueprint_encode', 'dice', 'monaco_immune', 'novershtern_hematopoietic'
|
|
648
|
+
- Mouse: 'immgen' (recommended), 'mouse_rnaseq'
|
|
649
|
+
* Custom reference (via reference_data_id parameter)
|
|
650
|
+
- Common mistakes:
|
|
651
|
+
* 'HumanPrimaryCellAtlasData' - WRONG, use 'hpca'
|
|
652
|
+
* 'ImmGenData' - WRONG, use 'immgen'
|
|
653
|
+
- Returns correlation-based confidence scores for cell type assignments
|
|
654
|
+
- No GPU required (Python-based implementation via singler/celldex packages)
|
|
655
|
+
"""
|
|
656
|
+
# Validate dataset
|
|
657
|
+
validate_dataset(data_id)
|
|
658
|
+
|
|
659
|
+
# Validate reference data for methods that require it
|
|
660
|
+
if (
|
|
661
|
+
params.method in ["tangram", "scanvi", "singler"]
|
|
662
|
+
and params.reference_data_id
|
|
663
|
+
and not data_manager.dataset_exists(params.reference_data_id)
|
|
664
|
+
):
|
|
665
|
+
raise DataNotFoundError(
|
|
666
|
+
f"Reference dataset {params.reference_data_id} not found"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
670
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
671
|
+
|
|
672
|
+
# Lazy import annotation tool (avoids slow startup)
|
|
673
|
+
from .tools.annotation import annotate_cell_types
|
|
674
|
+
|
|
675
|
+
# Call annotation function with ToolContext
|
|
676
|
+
result = await annotate_cell_types(data_id, ctx, params)
|
|
677
|
+
|
|
678
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
679
|
+
|
|
680
|
+
# Save annotation result
|
|
681
|
+
await data_manager.save_result(data_id, "annotation", result)
|
|
682
|
+
|
|
683
|
+
# Visualization should be done separately via visualization tools
|
|
684
|
+
|
|
685
|
+
return result
|
|
686
|
+
|
|
687
|
+
|
|
688
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_spatial_statistics"))
|
|
689
|
+
@mcp_tool_error_handler()
|
|
690
|
+
async def analyze_spatial_statistics(
|
|
691
|
+
data_id: str,
|
|
692
|
+
params: SpatialStatisticsParameters = SpatialStatisticsParameters(),
|
|
693
|
+
context: Optional[Context] = None,
|
|
694
|
+
) -> SpatialStatisticsResult:
|
|
695
|
+
"""Analyze spatial statistics and autocorrelation patterns
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
data_id: Dataset ID
|
|
699
|
+
params: Analysis parameters
|
|
700
|
+
|
|
701
|
+
Returns:
|
|
702
|
+
Spatial statistics analysis result with statistics and optional visualization
|
|
703
|
+
|
|
704
|
+
Notes:
|
|
705
|
+
Available analysis types (implemented):
|
|
706
|
+
- moran: Global Moran's I spatial autocorrelation (squidpy)
|
|
707
|
+
- local_moran: Local Moran's I (LISA) for spatial clustering detection
|
|
708
|
+
- geary: Geary's C spatial autocorrelation (squidpy)
|
|
709
|
+
- getis_ord: Getis-Ord Gi* hot/cold spot detection (esda/PySAL)
|
|
710
|
+
* Detects statistically significant spatial clusters of high/low values
|
|
711
|
+
* Parameters: getis_ord_alpha (significance level), getis_ord_correction (FDR/Bonferroni)
|
|
712
|
+
* Returns raw and corrected hotspot/coldspot counts
|
|
713
|
+
- neighborhood: Neighborhood enrichment (squidpy)
|
|
714
|
+
- co_occurrence: Co-occurrence analysis (squidpy)
|
|
715
|
+
- centrality: Graph centrality scores (squidpy)
|
|
716
|
+
- ripley: Ripley's K/L spatial point patterns
|
|
717
|
+
- bivariate_moran: Bivariate Moran's I for gene pair correlation
|
|
718
|
+
|
|
719
|
+
**Categorical Data Analysis (Choose based on number of categories):**
|
|
720
|
+
- join_count: Traditional Join Count for BINARY data (exactly 2 categories)
|
|
721
|
+
* Use for: Binary presence/absence, case/control, treated/untreated
|
|
722
|
+
* Returns: Global statistics (BB/WW/BW joins, p-value)
|
|
723
|
+
* Reference: Cliff & Ord (1981)
|
|
724
|
+
|
|
725
|
+
- local_join_count: Local Join Count for MULTI-CATEGORY data (>2 categories)
|
|
726
|
+
* Use for: Cell types, tissue domains, multi-class categorical variables
|
|
727
|
+
* Returns: Per-category local clustering statistics with p-values
|
|
728
|
+
* Identifies WHERE each category spatially clusters
|
|
729
|
+
* Reference: Anselin & Li (2019)
|
|
730
|
+
|
|
731
|
+
- network_properties: Spatial network analysis
|
|
732
|
+
- spatial_centrality: Spatial-specific centrality measures
|
|
733
|
+
"""
|
|
734
|
+
# Validate dataset
|
|
735
|
+
validate_dataset(data_id)
|
|
736
|
+
|
|
737
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
738
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
739
|
+
|
|
740
|
+
# Lazy import spatial_statistics (squidpy is slow to import)
|
|
741
|
+
from .tools.spatial_statistics import (
|
|
742
|
+
analyze_spatial_statistics as _analyze_spatial_statistics,
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# Call spatial statistics analysis function with ToolContext
|
|
746
|
+
result = await _analyze_spatial_statistics(data_id, ctx, params)
|
|
747
|
+
|
|
748
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
749
|
+
|
|
750
|
+
# Save spatial statistics result
|
|
751
|
+
await data_manager.save_result(data_id, "spatial_statistics", result)
|
|
752
|
+
|
|
753
|
+
# Note: Visualization should be created separately using create_visualization tool
|
|
754
|
+
# This maintains clean separation between analysis and visualization
|
|
755
|
+
|
|
756
|
+
return result
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
@mcp.tool(annotations=get_tool_annotations("find_markers"))
|
|
760
|
+
@mcp_tool_error_handler()
|
|
761
|
+
async def find_markers(
|
|
762
|
+
data_id: str,
|
|
763
|
+
group_key: str,
|
|
764
|
+
group1: Optional[str] = None,
|
|
765
|
+
group2: Optional[str] = None,
|
|
766
|
+
method: str = "wilcoxon",
|
|
767
|
+
n_top_genes: int = 25, # Number of top differentially expressed genes to return
|
|
768
|
+
pseudocount: float = 1.0, # Pseudocount for log2 fold change calculation
|
|
769
|
+
min_cells: int = 3, # Minimum cells per group for statistical testing
|
|
770
|
+
sample_key: Optional[str] = None, # Sample key for pseudobulk (pydeseq2)
|
|
771
|
+
context: Optional[Context] = None,
|
|
772
|
+
) -> DifferentialExpressionResult:
|
|
773
|
+
"""Find differentially expressed genes between groups
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
data_id: Dataset ID
|
|
777
|
+
group_key: Column name defining groups
|
|
778
|
+
group1: First group (if None, compare against all others)
|
|
779
|
+
group2: Second group (if None, compare group1 against all others)
|
|
780
|
+
method: Statistical test method
|
|
781
|
+
n_top_genes: Number of top differentially expressed genes to return
|
|
782
|
+
pseudocount: Pseudocount added to expression values before log2 fold change
|
|
783
|
+
calculation to avoid log(0). Default: 1.0 (standard practice).
|
|
784
|
+
Lower values (0.1-0.5) increase sensitivity to low-expression genes.
|
|
785
|
+
Higher values (1-10) stabilize fold changes for sparse data.
|
|
786
|
+
min_cells: Minimum number of cells per group for statistical testing.
|
|
787
|
+
Default: 3 (minimum required for Wilcoxon test).
|
|
788
|
+
Increase to 10-30 for more robust statistical results.
|
|
789
|
+
Groups with fewer cells are automatically skipped with a warning.
|
|
790
|
+
sample_key: Column name in adata.obs for sample/replicate identifier.
|
|
791
|
+
REQUIRED for 'pydeseq2' method to perform pseudobulk aggregation.
|
|
792
|
+
Common values: 'sample', 'patient_id', 'batch', 'replicate'.
|
|
793
|
+
|
|
794
|
+
Returns:
|
|
795
|
+
Differential expression result with top marker genes
|
|
796
|
+
"""
|
|
797
|
+
# Validate dataset
|
|
798
|
+
validate_dataset(data_id)
|
|
799
|
+
|
|
800
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
801
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
802
|
+
|
|
803
|
+
# Create params object for unified signature pattern
|
|
804
|
+
params = DifferentialExpressionParameters(
|
|
805
|
+
group_key=group_key,
|
|
806
|
+
group1=group1,
|
|
807
|
+
group2=group2,
|
|
808
|
+
method=method, # type: ignore[arg-type]
|
|
809
|
+
n_top_genes=n_top_genes,
|
|
810
|
+
pseudocount=pseudocount,
|
|
811
|
+
min_cells=min_cells,
|
|
812
|
+
sample_key=sample_key,
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
# Lazy import differential expression tool
|
|
816
|
+
from .tools.differential import differential_expression
|
|
817
|
+
|
|
818
|
+
# Call differential expression function with unified (data_id, ctx, params) signature
|
|
819
|
+
result = await differential_expression(data_id, ctx, params)
|
|
820
|
+
|
|
821
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
822
|
+
|
|
823
|
+
# Save differential expression result
|
|
824
|
+
await data_manager.save_result(data_id, "differential_expression", result)
|
|
825
|
+
|
|
826
|
+
return result
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
@mcp.tool(annotations=get_tool_annotations("compare_conditions"))
|
|
830
|
+
@mcp_tool_error_handler()
|
|
831
|
+
async def compare_conditions(
|
|
832
|
+
data_id: str,
|
|
833
|
+
condition_key: str,
|
|
834
|
+
condition1: str,
|
|
835
|
+
condition2: str,
|
|
836
|
+
sample_key: str,
|
|
837
|
+
cell_type_key: Optional[str] = None,
|
|
838
|
+
method: str = "pseudobulk",
|
|
839
|
+
n_top_genes: int = 50,
|
|
840
|
+
min_cells_per_sample: int = 10,
|
|
841
|
+
min_samples_per_condition: int = 2,
|
|
842
|
+
padj_threshold: float = 0.05,
|
|
843
|
+
log2fc_threshold: float = 0.0,
|
|
844
|
+
context: Optional[Context] = None,
|
|
845
|
+
) -> ConditionComparisonResult:
|
|
846
|
+
"""Compare experimental conditions across multiple biological samples.
|
|
847
|
+
|
|
848
|
+
This tool performs pseudobulk differential expression analysis to compare
|
|
849
|
+
conditions (e.g., Treatment vs Control) across biological replicates.
|
|
850
|
+
It properly accounts for sample-level variation using DESeq2.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
data_id: Dataset ID
|
|
854
|
+
condition_key: Column name in adata.obs containing experimental conditions
|
|
855
|
+
(e.g., 'treatment', 'disease_status', 'timepoint')
|
|
856
|
+
condition1: First condition for comparison (typically experimental group)
|
|
857
|
+
condition2: Second condition for comparison (typically control group)
|
|
858
|
+
sample_key: Column name in adata.obs identifying biological replicates
|
|
859
|
+
(e.g., 'patient_id', 'sample', 'replicate')
|
|
860
|
+
cell_type_key: Optional column for cell type stratification. If provided,
|
|
861
|
+
analysis is performed separately for each cell type.
|
|
862
|
+
method: Analysis method (currently only 'pseudobulk' is supported)
|
|
863
|
+
n_top_genes: Number of top genes to return per comparison
|
|
864
|
+
min_cells_per_sample: Minimum cells required per sample to be included
|
|
865
|
+
min_samples_per_condition: Minimum samples required per condition
|
|
866
|
+
padj_threshold: Adjusted p-value threshold for significance
|
|
867
|
+
log2fc_threshold: Log2 fold change threshold for significance
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
ConditionComparisonResult with differential expression results
|
|
871
|
+
|
|
872
|
+
Example:
|
|
873
|
+
# Global comparison
|
|
874
|
+
compare_conditions(
|
|
875
|
+
data_id="data1",
|
|
876
|
+
condition_key="treatment",
|
|
877
|
+
condition1="Drug",
|
|
878
|
+
condition2="Control",
|
|
879
|
+
sample_key="patient_id"
|
|
880
|
+
)
|
|
881
|
+
|
|
882
|
+
# Cell type stratified
|
|
883
|
+
compare_conditions(
|
|
884
|
+
data_id="data1",
|
|
885
|
+
condition_key="treatment",
|
|
886
|
+
condition1="Drug",
|
|
887
|
+
condition2="Control",
|
|
888
|
+
sample_key="patient_id",
|
|
889
|
+
cell_type_key="cell_type"
|
|
890
|
+
)
|
|
891
|
+
"""
|
|
892
|
+
# Validate dataset
|
|
893
|
+
validate_dataset(data_id)
|
|
894
|
+
|
|
895
|
+
# Create ToolContext
|
|
896
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
897
|
+
|
|
898
|
+
# Create params object
|
|
899
|
+
params = ConditionComparisonParameters(
|
|
900
|
+
condition_key=condition_key,
|
|
901
|
+
condition1=condition1,
|
|
902
|
+
condition2=condition2,
|
|
903
|
+
sample_key=sample_key,
|
|
904
|
+
cell_type_key=cell_type_key,
|
|
905
|
+
method=method, # type: ignore[arg-type]
|
|
906
|
+
n_top_genes=n_top_genes,
|
|
907
|
+
min_cells_per_sample=min_cells_per_sample,
|
|
908
|
+
min_samples_per_condition=min_samples_per_condition,
|
|
909
|
+
padj_threshold=padj_threshold,
|
|
910
|
+
log2fc_threshold=log2fc_threshold,
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
# Lazy import
|
|
914
|
+
from .tools.condition_comparison import compare_conditions as _compare_conditions
|
|
915
|
+
|
|
916
|
+
# Run analysis
|
|
917
|
+
result = await _compare_conditions(data_id, ctx, params)
|
|
918
|
+
|
|
919
|
+
# Save result
|
|
920
|
+
await data_manager.save_result(data_id, "condition_comparison", result)
|
|
921
|
+
|
|
922
|
+
return result
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_cnv"))
|
|
926
|
+
@mcp_tool_error_handler()
|
|
927
|
+
async def analyze_cnv(
|
|
928
|
+
data_id: str,
|
|
929
|
+
reference_key: str,
|
|
930
|
+
reference_categories: list[str],
|
|
931
|
+
method: str = "infercnvpy",
|
|
932
|
+
window_size: int = 100,
|
|
933
|
+
step: int = 10,
|
|
934
|
+
exclude_chromosomes: Optional[list[str]] = None,
|
|
935
|
+
dynamic_threshold: Optional[float] = 1.5,
|
|
936
|
+
cluster_cells: bool = False,
|
|
937
|
+
dendrogram: bool = False,
|
|
938
|
+
numbat_genome: str = "hg38",
|
|
939
|
+
numbat_allele_data_key: str = "allele_counts",
|
|
940
|
+
numbat_t: float = 0.15,
|
|
941
|
+
numbat_max_entropy: float = 0.8,
|
|
942
|
+
numbat_min_cells: int = 10,
|
|
943
|
+
numbat_ncores: int = 1,
|
|
944
|
+
numbat_skip_nj: bool = False,
|
|
945
|
+
context: Optional[Context] = None,
|
|
946
|
+
) -> CNVResult:
|
|
947
|
+
"""Analyze copy number variations (CNVs) in spatial transcriptomics data
|
|
948
|
+
|
|
949
|
+
Supports two CNV analysis methods:
|
|
950
|
+
- infercnvpy: Expression-based CNV inference (default, fast)
|
|
951
|
+
- Numbat: Haplotype-aware CNV analysis (requires allele data, more accurate)
|
|
952
|
+
|
|
953
|
+
Args:
|
|
954
|
+
data_id: Dataset identifier
|
|
955
|
+
reference_key: Column name in adata.obs for cell type labels
|
|
956
|
+
reference_categories: List of cell types to use as reference (normal cells)
|
|
957
|
+
method: CNV analysis method ("infercnvpy" or "numbat", default: "infercnvpy")
|
|
958
|
+
window_size: Number of genes for CNV averaging window (default: 100)
|
|
959
|
+
step: Step size for sliding window (default: 10)
|
|
960
|
+
exclude_chromosomes: Chromosomes to exclude (e.g., ['chrX', 'chrY'])
|
|
961
|
+
dynamic_threshold: Threshold for dynamic CNV calling (default: 1.5)
|
|
962
|
+
cluster_cells: Whether to cluster cells by CNV pattern
|
|
963
|
+
dendrogram: Whether to compute hierarchical clustering dendrogram
|
|
964
|
+
context: MCP context
|
|
965
|
+
|
|
966
|
+
Returns:
|
|
967
|
+
CNV analysis result with statistics and visualization availability
|
|
968
|
+
|
|
969
|
+
Notes:
|
|
970
|
+
CNV analysis methods:
|
|
971
|
+
- infercnvpy: Expression-based (implemented, no allele data required)
|
|
972
|
+
- numbat: Haplotype-aware (implemented when rpy2 installed, requires allele data)
|
|
973
|
+
|
|
974
|
+
Numbat-specific notes:
|
|
975
|
+
- Method: Haplotype-aware CNV analysis with phylogeny reconstruction
|
|
976
|
+
- Requires: Allele-specific counts in adata.layers or adata.obsm
|
|
977
|
+
- Allele data preparation: Use cellSNP-lite, pileup_and_phase, or similar tools
|
|
978
|
+
- Genome options: hg38, hg19, mm10, mm39
|
|
979
|
+
- Returns: CNV matrix, clone assignments, phylogeny tree
|
|
980
|
+
- GPU acceleration: Not applicable (R-based method)
|
|
981
|
+
|
|
982
|
+
Examples:
|
|
983
|
+
# Basic infercnvpy analysis
|
|
984
|
+
analyze_cnv("data1", "cell_type", ["T cells", "B cells"])
|
|
985
|
+
|
|
986
|
+
# Numbat analysis (requires allele data)
|
|
987
|
+
analyze_cnv("data1", "cell_type", ["T cells", "B cells"],
|
|
988
|
+
method="numbat", numbat_genome="hg38")
|
|
989
|
+
|
|
990
|
+
# With clustering
|
|
991
|
+
analyze_cnv("data1", "leiden", ["0", "1"], cluster_cells=True)
|
|
992
|
+
"""
|
|
993
|
+
# Validate dataset
|
|
994
|
+
validate_dataset(data_id)
|
|
995
|
+
|
|
996
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
997
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
998
|
+
|
|
999
|
+
# Create CNVParameters object
|
|
1000
|
+
# Type: ignore needed for Literal parameters validated at runtime by Pydantic
|
|
1001
|
+
params = CNVParameters(
|
|
1002
|
+
method=method, # type: ignore[arg-type]
|
|
1003
|
+
reference_key=reference_key,
|
|
1004
|
+
reference_categories=reference_categories,
|
|
1005
|
+
window_size=window_size,
|
|
1006
|
+
step=step,
|
|
1007
|
+
exclude_chromosomes=exclude_chromosomes,
|
|
1008
|
+
dynamic_threshold=dynamic_threshold,
|
|
1009
|
+
cluster_cells=cluster_cells,
|
|
1010
|
+
dendrogram=dendrogram,
|
|
1011
|
+
numbat_genome=numbat_genome, # type: ignore[arg-type]
|
|
1012
|
+
numbat_allele_data_key=numbat_allele_data_key,
|
|
1013
|
+
numbat_t=numbat_t,
|
|
1014
|
+
numbat_max_entropy=numbat_max_entropy,
|
|
1015
|
+
numbat_min_cells=numbat_min_cells,
|
|
1016
|
+
numbat_ncores=numbat_ncores,
|
|
1017
|
+
numbat_skip_nj=numbat_skip_nj,
|
|
1018
|
+
)
|
|
1019
|
+
|
|
1020
|
+
# Lazy import CNV analysis tool
|
|
1021
|
+
from .tools.cnv_analysis import infer_cnv
|
|
1022
|
+
|
|
1023
|
+
# Call CNV inference function with ToolContext
|
|
1024
|
+
result = await infer_cnv(data_id=data_id, ctx=ctx, params=params)
|
|
1025
|
+
|
|
1026
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1027
|
+
|
|
1028
|
+
# Save CNV result
|
|
1029
|
+
await data_manager.save_result(data_id, "cnv_analysis", result)
|
|
1030
|
+
|
|
1031
|
+
return result
|
|
1032
|
+
|
|
1033
|
+
|
|
1034
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_velocity_data"))
|
|
1035
|
+
@mcp_tool_error_handler()
|
|
1036
|
+
async def analyze_velocity_data(
|
|
1037
|
+
data_id: str,
|
|
1038
|
+
params: RNAVelocityParameters = RNAVelocityParameters(),
|
|
1039
|
+
context: Optional[Context] = None,
|
|
1040
|
+
) -> RNAVelocityResult:
|
|
1041
|
+
"""Analyze RNA velocity to understand cellular dynamics
|
|
1042
|
+
|
|
1043
|
+
Args:
|
|
1044
|
+
data_id: Dataset ID
|
|
1045
|
+
params: RNA velocity parameters
|
|
1046
|
+
|
|
1047
|
+
Returns:
|
|
1048
|
+
RNA velocity analysis result
|
|
1049
|
+
|
|
1050
|
+
Notes:
|
|
1051
|
+
Velocity methods (status):
|
|
1052
|
+
- scvelo: scVelo with three modes (implemented, tested)
|
|
1053
|
+
- deterministic: Deterministic rate model
|
|
1054
|
+
- stochastic: Stochastic rate model (default)
|
|
1055
|
+
- dynamical: Dynamical model with ODE fitting
|
|
1056
|
+
- velovi: VeloVI deep learning method (implemented, requires scvi-tools, tested)
|
|
1057
|
+
"""
|
|
1058
|
+
# Validate dataset
|
|
1059
|
+
validate_dataset(data_id)
|
|
1060
|
+
|
|
1061
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
1062
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1063
|
+
|
|
1064
|
+
# Lazy import velocity analysis tool
|
|
1065
|
+
from .tools.velocity import analyze_rna_velocity
|
|
1066
|
+
|
|
1067
|
+
# Call RNA velocity function with ToolContext
|
|
1068
|
+
result = await analyze_rna_velocity(data_id, ctx, params)
|
|
1069
|
+
|
|
1070
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1071
|
+
|
|
1072
|
+
# Save velocity result
|
|
1073
|
+
await data_manager.save_result(data_id, "rna_velocity", result)
|
|
1074
|
+
|
|
1075
|
+
# Visualization should be done separately via visualization tools
|
|
1076
|
+
|
|
1077
|
+
return result
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_trajectory_data"))
|
|
1081
|
+
@mcp_tool_error_handler()
|
|
1082
|
+
async def analyze_trajectory_data(
|
|
1083
|
+
data_id: str,
|
|
1084
|
+
params: TrajectoryParameters = TrajectoryParameters(),
|
|
1085
|
+
context: Optional[Context] = None,
|
|
1086
|
+
) -> TrajectoryResult:
|
|
1087
|
+
"""Infer cellular trajectories and pseudotime
|
|
1088
|
+
|
|
1089
|
+
Args:
|
|
1090
|
+
data_id: Dataset ID
|
|
1091
|
+
params: Trajectory analysis parameters
|
|
1092
|
+
|
|
1093
|
+
Returns:
|
|
1094
|
+
Trajectory analysis result
|
|
1095
|
+
|
|
1096
|
+
Notes:
|
|
1097
|
+
Trajectory methods (status):
|
|
1098
|
+
- dpt: Diffusion pseudotime (implemented)
|
|
1099
|
+
- palantir: Probabilistic trajectory inference (implemented when palantir installed)
|
|
1100
|
+
- cellrank: RNA velocity-based trajectory inference (implemented when cellrank installed)
|
|
1101
|
+
- velovi: scvi-tools VeloVI (implemented when scvi-tools available)
|
|
1102
|
+
"""
|
|
1103
|
+
# Validate dataset
|
|
1104
|
+
validate_dataset(data_id)
|
|
1105
|
+
|
|
1106
|
+
# Create ToolContext
|
|
1107
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1108
|
+
|
|
1109
|
+
# Lazy import trajectory function
|
|
1110
|
+
from .tools.trajectory import analyze_trajectory
|
|
1111
|
+
|
|
1112
|
+
# Call trajectory function
|
|
1113
|
+
result = await analyze_trajectory(data_id, ctx, params)
|
|
1114
|
+
|
|
1115
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1116
|
+
|
|
1117
|
+
# Save trajectory result
|
|
1118
|
+
await data_manager.save_result(data_id, "trajectory", result)
|
|
1119
|
+
|
|
1120
|
+
# Visualization should be done separately via visualization tools
|
|
1121
|
+
|
|
1122
|
+
return result
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
@mcp.tool(annotations=get_tool_annotations("integrate_samples"))
|
|
1126
|
+
@mcp_tool_error_handler()
|
|
1127
|
+
async def integrate_samples(
|
|
1128
|
+
data_ids: list[str],
|
|
1129
|
+
params: IntegrationParameters = IntegrationParameters(),
|
|
1130
|
+
context: Optional[Context] = None,
|
|
1131
|
+
) -> IntegrationResult:
|
|
1132
|
+
"""Integrate multiple spatial transcriptomics samples
|
|
1133
|
+
|
|
1134
|
+
Args:
|
|
1135
|
+
data_ids: List of dataset IDs to integrate
|
|
1136
|
+
params: Integration parameters
|
|
1137
|
+
|
|
1138
|
+
Returns:
|
|
1139
|
+
Integration result with integrated dataset ID
|
|
1140
|
+
|
|
1141
|
+
Notes:
|
|
1142
|
+
Integration methods (status):
|
|
1143
|
+
- harmony, bbknn, scanorama: Classical methods (implemented)
|
|
1144
|
+
- scvi: Deep learning method (implemented, requires scvi-tools)
|
|
1145
|
+
|
|
1146
|
+
Removed methods:
|
|
1147
|
+
- multivi: Requires MuData format (not compatible with current workflow)
|
|
1148
|
+
- contrastivevi: Not integrated (designed for Perturb-seq use cases)
|
|
1149
|
+
"""
|
|
1150
|
+
# Validate all datasets first
|
|
1151
|
+
for data_id in data_ids:
|
|
1152
|
+
validate_dataset(data_id)
|
|
1153
|
+
|
|
1154
|
+
# Create ToolContext for clean data access
|
|
1155
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1156
|
+
|
|
1157
|
+
# Lazy import to avoid slow startup
|
|
1158
|
+
from .tools.integration import integrate_samples as integrate_func
|
|
1159
|
+
|
|
1160
|
+
# Call integration function with ToolContext
|
|
1161
|
+
# Note: integrate_func uses ctx.add_dataset() to store the integrated dataset
|
|
1162
|
+
result = await integrate_func(data_ids, ctx, params)
|
|
1163
|
+
|
|
1164
|
+
# Save integration result
|
|
1165
|
+
integrated_id = result.data_id
|
|
1166
|
+
await data_manager.save_result(integrated_id, "integration", result)
|
|
1167
|
+
|
|
1168
|
+
return result
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
@mcp.tool(annotations=get_tool_annotations("deconvolve_data"))
|
|
1172
|
+
@mcp_tool_error_handler()
|
|
1173
|
+
async def deconvolve_data(
|
|
1174
|
+
data_id: str,
|
|
1175
|
+
params: DeconvolutionParameters, # No default - LLM must provide parameters
|
|
1176
|
+
context: Optional[Context] = None,
|
|
1177
|
+
) -> DeconvolutionResult:
|
|
1178
|
+
"""Deconvolve spatial spots to estimate cell type proportions
|
|
1179
|
+
|
|
1180
|
+
Args:
|
|
1181
|
+
data_id: Dataset ID
|
|
1182
|
+
params: Deconvolution parameters including:
|
|
1183
|
+
- method: Deconvolution method to use
|
|
1184
|
+
- cell_type_key: Key in reference data for cell types (REQUIRED)
|
|
1185
|
+
- reference_data_id: Reference single-cell dataset ID (required for most methods)
|
|
1186
|
+
|
|
1187
|
+
Cell2location-specific parameters (official scvi-tools recommendations):
|
|
1188
|
+
Phase 1 (Critical fixes):
|
|
1189
|
+
- ref_model_epochs: Reference model training epochs (default: 250)
|
|
1190
|
+
- n_epochs: Cell2location model training epochs (default: 30000)
|
|
1191
|
+
- n_cells_per_spot: Expected cells per location (default: 30, tissue-dependent)
|
|
1192
|
+
- detection_alpha: RNA detection sensitivity (NEW DEFAULT 2024: 20, old: 200)
|
|
1193
|
+
- batch_key: Batch column for batch effect correction (default: None)
|
|
1194
|
+
- categorical_covariate_keys: Technical covariates list (default: None)
|
|
1195
|
+
- apply_gene_filtering: Apply official gene filtering (default: True)
|
|
1196
|
+
- gene_filter_*: Gene filtering thresholds (cell_count_cutoff=5, etc.)
|
|
1197
|
+
|
|
1198
|
+
Phase 2 (Training enhancements):
|
|
1199
|
+
- ref_model_lr: Reference model learning rate (default: 0.002)
|
|
1200
|
+
- cell2location_lr: Cell2location learning rate (default: 0.005)
|
|
1201
|
+
- ref_model_train_size: Training data fraction for ref model (default: 1.0)
|
|
1202
|
+
- cell2location_train_size: Training data fraction for cell2location (default: 1.0)
|
|
1203
|
+
- enable_qc_plots: Generate QC diagnostic plots (default: False)
|
|
1204
|
+
- qc_output_dir: Output directory for QC plots (default: None)
|
|
1205
|
+
|
|
1206
|
+
Phase 3 (Runtime optimization):
|
|
1207
|
+
- early_stopping: Enable early stopping to reduce training time (default: True)
|
|
1208
|
+
- early_stopping_patience: Epochs to wait before stopping (default: 45)
|
|
1209
|
+
- early_stopping_threshold: Minimum relative change threshold (default: 0.0)
|
|
1210
|
+
- use_aggressive_training: Use train_aggressive() for better convergence (default: True)
|
|
1211
|
+
- validation_size: Validation set fraction for early stopping (default: 0.1)
|
|
1212
|
+
|
|
1213
|
+
Returns:
|
|
1214
|
+
Deconvolution result with cell type proportions
|
|
1215
|
+
|
|
1216
|
+
Notes:
|
|
1217
|
+
Deconvolution methods (status):
|
|
1218
|
+
- cell2location, destvi, stereoscope, tangram: Implemented when scvi-tools available
|
|
1219
|
+
- rctd: Implemented via rpy2/R when R packages are installed (spacexr)
|
|
1220
|
+
* Supports 3 modes: 'doublet' (high-res), 'full' (low-res, default), 'multi' (greedy)
|
|
1221
|
+
* Mode selection via rctd_mode parameter
|
|
1222
|
+
* Reference: Cable et al. (2022) Nat. Biotechnol.
|
|
1223
|
+
- spotlight: Implemented via rpy2/R when R packages are installed
|
|
1224
|
+
- card: Implemented via rpy2/R when CARD package is installed
|
|
1225
|
+
* Unique feature: Models spatial correlation of cell type compositions via CAR model
|
|
1226
|
+
* Optional imputation: Create enhanced high-resolution spatial maps
|
|
1227
|
+
* Parameters: card_imputation, card_NumGrids, card_ineibor, card_minCountGene, card_minCountSpot
|
|
1228
|
+
* Reference: Ma & Zhou (2022) Nat. Biotechnol.
|
|
1229
|
+
|
|
1230
|
+
RCTD-specific notes:
|
|
1231
|
+
- Method: Robust decomposition of cell type mixtures using platform-free approach
|
|
1232
|
+
- Mode selection guide:
|
|
1233
|
+
* 'doublet': For high-resolution data (Slide-seq ~10μm, MERFISH, Visium HD)
|
|
1234
|
+
- Assigns 1-2 cell types per spot, identifies singlets vs doublets
|
|
1235
|
+
* 'full' (default): For low-resolution data (standard Visium 55μm spots)
|
|
1236
|
+
- Can assign any number of cell types, best for multi-cellular spots
|
|
1237
|
+
* 'multi': Greedy algorithm alternative to 'full'
|
|
1238
|
+
- More constrained than 'full', useful for intermediate resolutions
|
|
1239
|
+
- Additional parameters: rctd_confidence_threshold, rctd_doublet_threshold, max_cores
|
|
1240
|
+
|
|
1241
|
+
CARD-specific notes:
|
|
1242
|
+
- Method: Spatially informed cell type deconvolution with CAR (Conditional AutoRegressive) model
|
|
1243
|
+
- Unique capability: Models spatial correlation of cell type compositions across tissue locations
|
|
1244
|
+
- Imputation feature (optional via card_imputation=True):
|
|
1245
|
+
* Creates enhanced spatial maps with arbitrarily higher resolution than original measurement
|
|
1246
|
+
* Imputes cell type compositions and gene expression at unmeasured locations
|
|
1247
|
+
* Extremely fast: 0.4s for all genes (5816x faster than BayesSpace)
|
|
1248
|
+
* Use cases: Enhance Visium to near-cellular resolution, fill tissue gaps, smooth artifacts
|
|
1249
|
+
- Imputation parameters:
|
|
1250
|
+
* card_NumGrids: Number of grid points (2000=standard, 5000=high-res, 10000=ultra)
|
|
1251
|
+
* card_ineibor: Neighbors for smoothing (10=default, higher=smoother)
|
|
1252
|
+
- Quality control: card_minCountGene, card_minCountSpot
|
|
1253
|
+
- Multi-sample support: card_sample_key for batch effects
|
|
1254
|
+
- Visualization: Use plot_type='card_imputation' to visualize imputed results
|
|
1255
|
+
|
|
1256
|
+
Cell2location uses two-stage training:
|
|
1257
|
+
1. Reference model (NB regression): Learns cell type signatures (250 epochs)
|
|
1258
|
+
2. Cell2location model: Maps cell types to spatial locations (30000 epochs)
|
|
1259
|
+
"""
|
|
1260
|
+
# Validate dataset
|
|
1261
|
+
validate_dataset(data_id)
|
|
1262
|
+
|
|
1263
|
+
# Validate reference data if provided
|
|
1264
|
+
if params.reference_data_id and not data_manager.dataset_exists(
|
|
1265
|
+
params.reference_data_id
|
|
1266
|
+
):
|
|
1267
|
+
raise DataNotFoundError(
|
|
1268
|
+
f"Reference dataset {params.reference_data_id} not found"
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
1272
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1273
|
+
|
|
1274
|
+
# Lazy import deconvolution tool
|
|
1275
|
+
from .tools.deconvolution import deconvolve_spatial_data
|
|
1276
|
+
|
|
1277
|
+
# Call deconvolution function with ToolContext
|
|
1278
|
+
result = await deconvolve_spatial_data(data_id, ctx, params)
|
|
1279
|
+
|
|
1280
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1281
|
+
|
|
1282
|
+
# Save deconvolution result
|
|
1283
|
+
await data_manager.save_result(data_id, "deconvolution", result)
|
|
1284
|
+
|
|
1285
|
+
# Visualization should be done separately via visualization tools
|
|
1286
|
+
|
|
1287
|
+
return result
|
|
1288
|
+
|
|
1289
|
+
|
|
1290
|
+
@mcp.tool(annotations=get_tool_annotations("identify_spatial_domains"))
|
|
1291
|
+
@mcp_tool_error_handler()
|
|
1292
|
+
async def identify_spatial_domains(
|
|
1293
|
+
data_id: str,
|
|
1294
|
+
params: SpatialDomainParameters = SpatialDomainParameters(),
|
|
1295
|
+
context: Optional[Context] = None,
|
|
1296
|
+
) -> SpatialDomainResult:
|
|
1297
|
+
"""Identify spatial domains and tissue architecture
|
|
1298
|
+
|
|
1299
|
+
Args:
|
|
1300
|
+
data_id: Dataset ID
|
|
1301
|
+
params: Spatial domain parameters
|
|
1302
|
+
|
|
1303
|
+
Returns:
|
|
1304
|
+
Spatial domain result with identified domains
|
|
1305
|
+
|
|
1306
|
+
Notes:
|
|
1307
|
+
Spatial domain methods (status):
|
|
1308
|
+
- spagcn: SpaGCN graph convolutional network (implemented; optional dependency SpaGCN)
|
|
1309
|
+
- leiden / louvain: clustering-based (implemented; no extra deps)
|
|
1310
|
+
- stagate: STAGATE (implemented; optional dependency STAGATE)
|
|
1311
|
+
- graphst: GraphST graph self-supervised contrastive learning (implemented; optional dependency GraphST)
|
|
1312
|
+
- stlearn / sedr / bayesspace: not implemented in this server; planned/experimental
|
|
1313
|
+
"""
|
|
1314
|
+
# Validate dataset first
|
|
1315
|
+
validate_dataset(data_id)
|
|
1316
|
+
|
|
1317
|
+
# Create ToolContext for clean data access
|
|
1318
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1319
|
+
|
|
1320
|
+
# Lazy import to avoid slow startup
|
|
1321
|
+
from .tools.spatial_domains import identify_spatial_domains as identify_domains_func
|
|
1322
|
+
|
|
1323
|
+
# Call spatial domains function with ToolContext
|
|
1324
|
+
result = await identify_domains_func(data_id, ctx, params)
|
|
1325
|
+
|
|
1326
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1327
|
+
|
|
1328
|
+
# Save spatial domains result
|
|
1329
|
+
await data_manager.save_result(data_id, "spatial_domains", result)
|
|
1330
|
+
|
|
1331
|
+
return result
|
|
1332
|
+
|
|
1333
|
+
|
|
1334
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_cell_communication"))
|
|
1335
|
+
@mcp_tool_error_handler()
|
|
1336
|
+
async def analyze_cell_communication(
|
|
1337
|
+
data_id: str,
|
|
1338
|
+
params: CellCommunicationParameters, # No default - LLM must provide parameters
|
|
1339
|
+
context: Optional[Context] = None,
|
|
1340
|
+
) -> CellCommunicationResult:
|
|
1341
|
+
"""Analyze cell-cell communication patterns
|
|
1342
|
+
|
|
1343
|
+
Args:
|
|
1344
|
+
data_id: Dataset ID
|
|
1345
|
+
params: Cell communication parameters
|
|
1346
|
+
|
|
1347
|
+
Returns:
|
|
1348
|
+
Cell communication analysis result
|
|
1349
|
+
|
|
1350
|
+
Notes:
|
|
1351
|
+
Cell communication methods (status):
|
|
1352
|
+
- liana: Implemented (global/cluster and spatial bivariate modes; requires liana)
|
|
1353
|
+
- cellphonedb: Implemented (statistical analysis with spatial microenvironments; requires cellphonedb)
|
|
1354
|
+
- cellchat_r: Implemented (native R CellChat with full features; requires rpy2 and CellChat R package)
|
|
1355
|
+
- nichenet / connectome / cytotalk / squidpy: Not implemented in this server
|
|
1356
|
+
|
|
1357
|
+
IMPORTANT: For comprehensive cell communication analysis:
|
|
1358
|
+
|
|
1359
|
+
**Species-specific configuration:**
|
|
1360
|
+
- species="mouse" + liana_resource="mouseconsensus" for mouse data
|
|
1361
|
+
- species="human" + liana_resource="consensus" for human data
|
|
1362
|
+
- species="zebrafish" for zebrafish data
|
|
1363
|
+
|
|
1364
|
+
**Available LIANA resources (liana_resource parameter):**
|
|
1365
|
+
- "consensus" (default, recommended): Consensus of multiple databases
|
|
1366
|
+
- "mouseconsensus": Mouse-specific consensus database
|
|
1367
|
+
- "cellphonedb": CellPhoneDB database (curated, stringent)
|
|
1368
|
+
- "celltalkdb": CellTalkDB database (large, comprehensive)
|
|
1369
|
+
- "icellnet": iCellNet database (immune cell focus)
|
|
1370
|
+
- "cellchatdb": CellChat database
|
|
1371
|
+
- "connectomedb2020": Connectome database 2020
|
|
1372
|
+
- "baccin2019", "cellcall", "cellinker", "embrace", "guide2pharma",
|
|
1373
|
+
"hpmr", "italk", "kirouac2010", "lrdb", "ramilowski2015": Additional resources
|
|
1374
|
+
|
|
1375
|
+
**Common failure scenarios and solutions:**
|
|
1376
|
+
1. "Too few features from resource found in data":
|
|
1377
|
+
- adata.raw is automatically used when available for comprehensive gene coverage
|
|
1378
|
+
- Ensure species matches data (mouse vs human)
|
|
1379
|
+
- Use species-appropriate resource (mouseconsensus for mouse)
|
|
1380
|
+
|
|
1381
|
+
2. Missing spatial connectivity:
|
|
1382
|
+
- Run spatial neighbor computation in preprocessing step (see below)
|
|
1383
|
+
|
|
1384
|
+
3. Missing cell type annotations:
|
|
1385
|
+
- Ensure cell_type_key column exists or run annotation first
|
|
1386
|
+
|
|
1387
|
+
**Spatial connectivity computation (preprocessing step):**
|
|
1388
|
+
|
|
1389
|
+
The spatial neighborhood definition profoundly impacts cell communication analysis results.
|
|
1390
|
+
Choose parameters based on your spatial transcriptomics platform and biological question:
|
|
1391
|
+
|
|
1392
|
+
**Platform-specific recommendations:**
|
|
1393
|
+
|
|
1394
|
+
10x Visium (hexagonal grid, 55µm spots, 100µm center-to-center spacing):
|
|
1395
|
+
• coord_type: "grid" (for hexagonal layout) or "generic" (for custom)
|
|
1396
|
+
• n_neighs: 6 (direct neighbors in hexagonal grid)
|
|
1397
|
+
• n_rings: 1-2 (for grid mode: 1=first ring only, 2=first+second ring)
|
|
1398
|
+
• radius: 150-200 pixels (for distance-based, ~captures first neighbor ring)
|
|
1399
|
+
├─ Local interactions (paracrine signaling): n_neighs=6 or n_rings=1
|
|
1400
|
+
├─ Microenvironment analysis: n_neighs=12-18 or n_rings=2
|
|
1401
|
+
└─ Broader spatial context: radius=300-500 pixels
|
|
1402
|
+
|
|
1403
|
+
Slide-seq/Slide-seqV2 (10µm beads, high density):
|
|
1404
|
+
• coord_type: "generic"
|
|
1405
|
+
• n_neighs: 10-30 (higher density requires more neighbors)
|
|
1406
|
+
• radius: 50-100 µm (typical cell-cell signaling range)
|
|
1407
|
+
├─ Dense regions: n_neighs=20-30
|
|
1408
|
+
├─ Sparse regions: n_neighs=10-15
|
|
1409
|
+
└─ Distance-based: radius=50-100 µm (matches biological signaling range)
|
|
1410
|
+
|
|
1411
|
+
MERFISH/seqFISH+ (single-cell resolution, <1µm precision):
|
|
1412
|
+
• coord_type: "generic"
|
|
1413
|
+
• n_neighs: 3-10 (nearest cell neighbors)
|
|
1414
|
+
• radius: 20-50 µm (direct cell-cell contact to short-range paracrine)
|
|
1415
|
+
├─ Direct contact: n_neighs=3-5 or radius=10-20 µm
|
|
1416
|
+
├─ Paracrine signaling: n_neighs=5-10 or radius=30-50 µm
|
|
1417
|
+
└─ Microenvironment: radius=50-100 µm
|
|
1418
|
+
|
|
1419
|
+
**Biological considerations:**
|
|
1420
|
+
|
|
1421
|
+
Cell communication distance ranges (from literature):
|
|
1422
|
+
• Juxtacrine signaling: 0-10 µm (direct contact)
|
|
1423
|
+
• Paracrine signaling: 10-100 µm (e.g., Wnt/Wg: ~50-100 µm)
|
|
1424
|
+
• Broader microenvironment: 100-500 µm
|
|
1425
|
+
|
|
1426
|
+
Analysis goal-based selection:
|
|
1427
|
+
• Identify direct cell-cell interactions → Use smaller neighborhoods (n_neighs=6-10, radius=50-100 µm)
|
|
1428
|
+
• Study tissue microenvironments → Use larger neighborhoods (n_neighs=15-30, radius=200-500 µm)
|
|
1429
|
+
• Rare cell type interactions → Use adaptive/larger k to avoid missing signals
|
|
1430
|
+
• Abundant cell types → Use smaller k to avoid spurious connections
|
|
1431
|
+
|
|
1432
|
+
**Parameter tradeoffs:**
|
|
1433
|
+
• Larger neighborhoods: Capture long-range signals but lose spatial specificity
|
|
1434
|
+
• Smaller neighborhoods: High spatial precision but may miss important interactions
|
|
1435
|
+
• Fixed k (n_neighs): Same number for all spots, may overcluster dense regions
|
|
1436
|
+
• Distance-based (radius): More biologically meaningful but varying neighbor counts
|
|
1437
|
+
|
|
1438
|
+
**Examples:**
|
|
1439
|
+
|
|
1440
|
+
Visium - local paracrine signaling:
|
|
1441
|
+
# Step 1: Compute spatial neighbors (preprocessing)
|
|
1442
|
+
import squidpy as sq
|
|
1443
|
+
sq.gr.spatial_neighbors(adata, coord_type='grid', n_rings=1)
|
|
1444
|
+
|
|
1445
|
+
# Step 2: Analyze communication
|
|
1446
|
+
params = {
|
|
1447
|
+
"species": "human",
|
|
1448
|
+
"liana_resource": "consensus"
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
Visium - microenvironment analysis:
|
|
1452
|
+
# Step 1: Compute spatial neighbors (preprocessing)
|
|
1453
|
+
import squidpy as sq
|
|
1454
|
+
sq.gr.spatial_neighbors(adata, coord_type='generic', n_neighs=18)
|
|
1455
|
+
|
|
1456
|
+
# Step 2: Analyze communication
|
|
1457
|
+
params = {
|
|
1458
|
+
"species": "human"
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
MERFISH - direct cell-cell contact:
|
|
1462
|
+
# Step 1: Compute spatial neighbors (preprocessing)
|
|
1463
|
+
import squidpy as sq
|
|
1464
|
+
sq.gr.spatial_neighbors(adata, coord_type='generic', radius=20)
|
|
1465
|
+
|
|
1466
|
+
# Step 2: Analyze communication
|
|
1467
|
+
params = {
|
|
1468
|
+
"species": "mouse",
|
|
1469
|
+
"liana_resource": "mouseconsensus"
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
**References:**
|
|
1473
|
+
• Squidpy framework: Palla et al., Nat Methods 2022
|
|
1474
|
+
• LIANA+: Dimitrov et al., Nat Cell Biol 2024
|
|
1475
|
+
• Visium resolution: 10x Genomics Technical Note
|
|
1476
|
+
• Signaling ranges: Literature-based (Wnt/Wg: ~50-100 µm)
|
|
1477
|
+
"""
|
|
1478
|
+
# Validate dataset first
|
|
1479
|
+
validate_dataset(data_id)
|
|
1480
|
+
|
|
1481
|
+
# Create ToolContext for clean data access
|
|
1482
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1483
|
+
|
|
1484
|
+
# Lazy import to avoid slow startup
|
|
1485
|
+
from .tools.cell_communication import (
|
|
1486
|
+
analyze_cell_communication as analyze_comm_func,
|
|
1487
|
+
)
|
|
1488
|
+
|
|
1489
|
+
# Call cell communication function with ToolContext
|
|
1490
|
+
result = await analyze_comm_func(data_id, ctx, params)
|
|
1491
|
+
|
|
1492
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1493
|
+
|
|
1494
|
+
# Save communication result
|
|
1495
|
+
await data_manager.save_result(data_id, "cell_communication", result)
|
|
1496
|
+
|
|
1497
|
+
# Visualization should be done separately via visualization tools
|
|
1498
|
+
|
|
1499
|
+
return result
|
|
1500
|
+
|
|
1501
|
+
|
|
1502
|
+
@mcp.tool(annotations=get_tool_annotations("analyze_enrichment"))
|
|
1503
|
+
@mcp_tool_error_handler()
|
|
1504
|
+
async def analyze_enrichment(
|
|
1505
|
+
data_id: str,
|
|
1506
|
+
params: Optional[EnrichmentParameters] = None,
|
|
1507
|
+
context: Optional[Context] = None,
|
|
1508
|
+
) -> EnrichmentResult:
|
|
1509
|
+
"""Perform gene set enrichment analysis
|
|
1510
|
+
|
|
1511
|
+
Args:
|
|
1512
|
+
data_id: Dataset ID
|
|
1513
|
+
params: Enrichment analysis parameters (REQUIRED: species must be specified)
|
|
1514
|
+
|
|
1515
|
+
Returns:
|
|
1516
|
+
Enrichment analysis result
|
|
1517
|
+
|
|
1518
|
+
IMPORTANT - Species and Database Selection:
|
|
1519
|
+
You MUST specify 'species' parameter explicitly. No default species is assumed.
|
|
1520
|
+
|
|
1521
|
+
Recommended database combinations by species:
|
|
1522
|
+
|
|
1523
|
+
FOR MOUSE DATA (species="mouse"):
|
|
1524
|
+
- "KEGG_Pathways" (recommended, uses KEGG_2019_Mouse internally)
|
|
1525
|
+
- "Reactome_Pathways" (comprehensive pathway database)
|
|
1526
|
+
- "MSigDB_Hallmark" (curated hallmark gene sets)
|
|
1527
|
+
- "GO_Biological_Process" (works but may have fewer matches)
|
|
1528
|
+
|
|
1529
|
+
FOR HUMAN DATA (species="human"):
|
|
1530
|
+
- "KEGG_Pathways" (recommended, uses KEGG_2021_Human internally)
|
|
1531
|
+
- "Reactome_Pathways" (comprehensive pathway database)
|
|
1532
|
+
- "MSigDB_Hallmark" (curated hallmark gene sets)
|
|
1533
|
+
- "GO_Biological_Process" (standard GO terms)
|
|
1534
|
+
|
|
1535
|
+
Available gene_set_database options:
|
|
1536
|
+
- "GO_Biological_Process" (default, auto-adapts to species)
|
|
1537
|
+
- "GO_Molecular_Function" (GO molecular function terms)
|
|
1538
|
+
- "GO_Cellular_Component" (GO cellular component terms)
|
|
1539
|
+
- "KEGG_Pathways" (species-specific: KEGG_2021_Human or KEGG_2019_Mouse)
|
|
1540
|
+
- "Reactome_Pathways" (Reactome_2022 pathway database)
|
|
1541
|
+
- "MSigDB_Hallmark" (MSigDB_Hallmark_2020 curated gene sets)
|
|
1542
|
+
- "Cell_Type_Markers" (cell type marker genes)
|
|
1543
|
+
- Custom gene sets via gene_sets parameter
|
|
1544
|
+
|
|
1545
|
+
Methods available:
|
|
1546
|
+
- "pathway_ora": Over-representation analysis (recommended)
|
|
1547
|
+
- "pathway_enrichr": Enrichr web service
|
|
1548
|
+
- "pathway_gsea": Gene Set Enrichment Analysis
|
|
1549
|
+
- "pathway_ssgsea": Single-sample GSEA
|
|
1550
|
+
- "spatial_enrichmap": Spatial enrichment mapping
|
|
1551
|
+
|
|
1552
|
+
Complete results are preserved in adata.uns for downstream visualization and analysis.
|
|
1553
|
+
|
|
1554
|
+
Example usage:
|
|
1555
|
+
For mouse data: params={"species": "mouse", "gene_set_database": "KEGG_Pathways"}
|
|
1556
|
+
For human data: params={"species": "human", "gene_set_database": "KEGG_Pathways"}
|
|
1557
|
+
"""
|
|
1558
|
+
from .tools.enrichment import analyze_enrichment as analyze_enrichment_func
|
|
1559
|
+
|
|
1560
|
+
# Validate dataset
|
|
1561
|
+
validate_dataset(data_id)
|
|
1562
|
+
|
|
1563
|
+
# Create ToolContext
|
|
1564
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1565
|
+
|
|
1566
|
+
# Call enrichment analysis (all business logic is in tools/enrichment.py)
|
|
1567
|
+
result = await analyze_enrichment_func(data_id, ctx, params)
|
|
1568
|
+
|
|
1569
|
+
# Save result
|
|
1570
|
+
await data_manager.save_result(data_id, "enrichment", result)
|
|
1571
|
+
|
|
1572
|
+
return result
|
|
1573
|
+
|
|
1574
|
+
|
|
1575
|
+
@mcp.tool(annotations=get_tool_annotations("find_spatial_genes"))
|
|
1576
|
+
@mcp_tool_error_handler()
|
|
1577
|
+
async def find_spatial_genes(
|
|
1578
|
+
data_id: str,
|
|
1579
|
+
params: SpatialVariableGenesParameters = SpatialVariableGenesParameters(),
|
|
1580
|
+
context: Optional[Context] = None,
|
|
1581
|
+
) -> SpatialVariableGenesResult:
|
|
1582
|
+
"""Identify spatially variable genes using various methods
|
|
1583
|
+
|
|
1584
|
+
Args:
|
|
1585
|
+
data_id: Dataset ID
|
|
1586
|
+
params: Spatial variable gene parameters
|
|
1587
|
+
|
|
1588
|
+
Returns:
|
|
1589
|
+
Spatial variable genes result
|
|
1590
|
+
|
|
1591
|
+
Notes:
|
|
1592
|
+
Available methods:
|
|
1593
|
+
- sparkx: SPARK-X non-parametric method (default, best accuracy)
|
|
1594
|
+
- spatialde: SpatialDE Gaussian process-based method (statistically rigorous)
|
|
1595
|
+
|
|
1596
|
+
Method selection via params.method parameter.
|
|
1597
|
+
Each method has specific parameters - see SpatialVariableGenesParameters model.
|
|
1598
|
+
|
|
1599
|
+
Performance comparison (3000 spots × 20000 genes):
|
|
1600
|
+
- SPARK-X: ~2-5 min (best accuracy)
|
|
1601
|
+
- SpatialDE: ~15-30 min (best statistical rigor)
|
|
1602
|
+
"""
|
|
1603
|
+
# Validate dataset
|
|
1604
|
+
validate_dataset(data_id)
|
|
1605
|
+
|
|
1606
|
+
# Create ToolContext for clean data access (no redundant dict wrapping)
|
|
1607
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1608
|
+
|
|
1609
|
+
# Lazy import spatial genes tool
|
|
1610
|
+
from .tools.spatial_genes import identify_spatial_genes
|
|
1611
|
+
|
|
1612
|
+
# Call spatial genes function with ToolContext
|
|
1613
|
+
result = await identify_spatial_genes(data_id, ctx, params)
|
|
1614
|
+
|
|
1615
|
+
# Note: No writeback needed - adata modifications are in-place on the same object
|
|
1616
|
+
|
|
1617
|
+
# Save spatial genes result
|
|
1618
|
+
await data_manager.save_result(data_id, "spatial_genes", result)
|
|
1619
|
+
|
|
1620
|
+
# Visualization should be done separately via visualization tools
|
|
1621
|
+
|
|
1622
|
+
return result
|
|
1623
|
+
|
|
1624
|
+
|
|
1625
|
+
@mcp.tool(annotations=get_tool_annotations("register_spatial_data"))
|
|
1626
|
+
@mcp_tool_error_handler()
|
|
1627
|
+
async def register_spatial_data(
|
|
1628
|
+
source_id: str,
|
|
1629
|
+
target_id: str,
|
|
1630
|
+
method: str = "paste",
|
|
1631
|
+
landmarks: Optional[list[dict[str, Any]]] = None,
|
|
1632
|
+
context: Optional[Context] = None,
|
|
1633
|
+
) -> dict[str, Any]:
|
|
1634
|
+
"""Register/align spatial transcriptomics data across sections
|
|
1635
|
+
|
|
1636
|
+
Args:
|
|
1637
|
+
source_id: Source dataset ID
|
|
1638
|
+
target_id: Target dataset ID to align to
|
|
1639
|
+
method: Registration method (paste, stalign)
|
|
1640
|
+
landmarks: Additional parameters for registration methods
|
|
1641
|
+
|
|
1642
|
+
Returns:
|
|
1643
|
+
Registration result with transformation matrix
|
|
1644
|
+
"""
|
|
1645
|
+
# Validate datasets first
|
|
1646
|
+
validate_dataset(source_id)
|
|
1647
|
+
validate_dataset(target_id)
|
|
1648
|
+
|
|
1649
|
+
# Create ToolContext for unified data access
|
|
1650
|
+
ctx = ToolContext(_data_manager=data_manager, _mcp_context=context)
|
|
1651
|
+
|
|
1652
|
+
# Lazy import to avoid slow startup
|
|
1653
|
+
from .tools.spatial_registration import register_spatial_slices_mcp
|
|
1654
|
+
|
|
1655
|
+
# Call registration function using ToolContext
|
|
1656
|
+
# Note: registration modifies adata in-place, changes reflected via reference
|
|
1657
|
+
result = await register_spatial_slices_mcp(source_id, target_id, ctx, method)
|
|
1658
|
+
|
|
1659
|
+
# Save registration result
|
|
1660
|
+
await data_manager.save_result(source_id, "registration", result)
|
|
1661
|
+
|
|
1662
|
+
return result
|
|
1663
|
+
|
|
1664
|
+
|
|
1665
|
+
# ============== Publication Export Tools ==============
|
|
1666
|
+
|
|
1667
|
+
|
|
1668
|
+
@mcp.tool(annotations=get_tool_annotations("save_data"))
|
|
1669
|
+
@mcp_tool_error_handler()
|
|
1670
|
+
async def save_data(
|
|
1671
|
+
data_id: str,
|
|
1672
|
+
output_path: Optional[str] = None,
|
|
1673
|
+
context: Optional[Context] = None,
|
|
1674
|
+
) -> str:
|
|
1675
|
+
"""Manually save dataset to disk
|
|
1676
|
+
|
|
1677
|
+
Saves the current state of the dataset including all analysis results
|
|
1678
|
+
and metadata to a compressed H5AD file.
|
|
1679
|
+
|
|
1680
|
+
Args:
|
|
1681
|
+
data_id: Dataset ID to save
|
|
1682
|
+
output_path: Optional custom save path. If not provided, saves to:
|
|
1683
|
+
- CHATSPATIAL_DATA_DIR environment variable location, or
|
|
1684
|
+
- .chatspatial_saved/ directory next to original data
|
|
1685
|
+
|
|
1686
|
+
Returns:
|
|
1687
|
+
Path where data was saved
|
|
1688
|
+
|
|
1689
|
+
Examples:
|
|
1690
|
+
# Save to default location
|
|
1691
|
+
save_data("data1")
|
|
1692
|
+
|
|
1693
|
+
# Save to custom location
|
|
1694
|
+
save_data("data1", output_path="/path/to/save/my_analysis.h5ad")
|
|
1695
|
+
|
|
1696
|
+
Note:
|
|
1697
|
+
Saved files include all preprocessing, analysis results, and metadata.
|
|
1698
|
+
Use CHATSPATIAL_DATA_DIR environment variable for centralized storage.
|
|
1699
|
+
"""
|
|
1700
|
+
from .utils.persistence import save_adata
|
|
1701
|
+
|
|
1702
|
+
# Validate dataset exists
|
|
1703
|
+
validate_dataset(data_id)
|
|
1704
|
+
|
|
1705
|
+
if context:
|
|
1706
|
+
await context.info(f"Saving dataset '{data_id}'...")
|
|
1707
|
+
|
|
1708
|
+
# Get dataset info
|
|
1709
|
+
dataset_info = await data_manager.get_dataset(data_id)
|
|
1710
|
+
adata = dataset_info["adata"]
|
|
1711
|
+
original_path = dataset_info.get("path", "")
|
|
1712
|
+
|
|
1713
|
+
try:
|
|
1714
|
+
if output_path:
|
|
1715
|
+
# User specified custom path
|
|
1716
|
+
from pathlib import Path
|
|
1717
|
+
|
|
1718
|
+
# Resolve to absolute path to avoid confusion about save location
|
|
1719
|
+
save_path = Path(output_path).resolve()
|
|
1720
|
+
save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1721
|
+
adata.write_h5ad(save_path, compression="gzip", compression_opts=4)
|
|
1722
|
+
else:
|
|
1723
|
+
# Use default location
|
|
1724
|
+
save_path = save_adata(data_id, adata, original_path)
|
|
1725
|
+
|
|
1726
|
+
# Always return absolute path so user knows exact location
|
|
1727
|
+
absolute_path = save_path.resolve()
|
|
1728
|
+
|
|
1729
|
+
if context:
|
|
1730
|
+
await context.info(f"Dataset saved to: {absolute_path}")
|
|
1731
|
+
|
|
1732
|
+
return f"Dataset '{data_id}' saved to: {absolute_path}"
|
|
1733
|
+
|
|
1734
|
+
except Exception as e:
|
|
1735
|
+
error_msg = f"Failed to save dataset: {e}"
|
|
1736
|
+
if context:
|
|
1737
|
+
await context.error(error_msg)
|
|
1738
|
+
raise
|
|
1739
|
+
|
|
1740
|
+
|
|
1741
|
+
def main():
|
|
1742
|
+
"""Run the MCP server"""
|
|
1743
|
+
import argparse
|
|
1744
|
+
|
|
1745
|
+
parser = argparse.ArgumentParser(description="ChatSpatial MCP Server")
|
|
1746
|
+
parser.add_argument(
|
|
1747
|
+
"--transport",
|
|
1748
|
+
choices=["stdio", "sse"],
|
|
1749
|
+
default="stdio",
|
|
1750
|
+
help="Transport protocol to use (default: stdio)",
|
|
1751
|
+
)
|
|
1752
|
+
|
|
1753
|
+
args = parser.parse_args()
|
|
1754
|
+
|
|
1755
|
+
print(
|
|
1756
|
+
f"Starting ChatSpatial server with {args.transport} transport...",
|
|
1757
|
+
file=sys.stderr,
|
|
1758
|
+
)
|
|
1759
|
+
mcp.run(transport=args.transport)
|
|
1760
|
+
|
|
1761
|
+
|
|
1762
|
+
if __name__ == "__main__":
|
|
1763
|
+
main()
|