chatspatial 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatspatial/__init__.py +11 -0
- chatspatial/__main__.py +141 -0
- chatspatial/cli/__init__.py +7 -0
- chatspatial/config.py +53 -0
- chatspatial/models/__init__.py +85 -0
- chatspatial/models/analysis.py +513 -0
- chatspatial/models/data.py +2462 -0
- chatspatial/server.py +1763 -0
- chatspatial/spatial_mcp_adapter.py +720 -0
- chatspatial/tools/__init__.py +3 -0
- chatspatial/tools/annotation.py +1903 -0
- chatspatial/tools/cell_communication.py +1603 -0
- chatspatial/tools/cnv_analysis.py +605 -0
- chatspatial/tools/condition_comparison.py +595 -0
- chatspatial/tools/deconvolution/__init__.py +402 -0
- chatspatial/tools/deconvolution/base.py +318 -0
- chatspatial/tools/deconvolution/card.py +244 -0
- chatspatial/tools/deconvolution/cell2location.py +326 -0
- chatspatial/tools/deconvolution/destvi.py +144 -0
- chatspatial/tools/deconvolution/flashdeconv.py +101 -0
- chatspatial/tools/deconvolution/rctd.py +317 -0
- chatspatial/tools/deconvolution/spotlight.py +216 -0
- chatspatial/tools/deconvolution/stereoscope.py +109 -0
- chatspatial/tools/deconvolution/tangram.py +135 -0
- chatspatial/tools/differential.py +625 -0
- chatspatial/tools/embeddings.py +298 -0
- chatspatial/tools/enrichment.py +1863 -0
- chatspatial/tools/integration.py +807 -0
- chatspatial/tools/preprocessing.py +723 -0
- chatspatial/tools/spatial_domains.py +808 -0
- chatspatial/tools/spatial_genes.py +836 -0
- chatspatial/tools/spatial_registration.py +441 -0
- chatspatial/tools/spatial_statistics.py +1476 -0
- chatspatial/tools/trajectory.py +495 -0
- chatspatial/tools/velocity.py +405 -0
- chatspatial/tools/visualization/__init__.py +155 -0
- chatspatial/tools/visualization/basic.py +393 -0
- chatspatial/tools/visualization/cell_comm.py +699 -0
- chatspatial/tools/visualization/cnv.py +320 -0
- chatspatial/tools/visualization/core.py +684 -0
- chatspatial/tools/visualization/deconvolution.py +852 -0
- chatspatial/tools/visualization/enrichment.py +660 -0
- chatspatial/tools/visualization/integration.py +205 -0
- chatspatial/tools/visualization/main.py +164 -0
- chatspatial/tools/visualization/multi_gene.py +739 -0
- chatspatial/tools/visualization/persistence.py +335 -0
- chatspatial/tools/visualization/spatial_stats.py +469 -0
- chatspatial/tools/visualization/trajectory.py +639 -0
- chatspatial/tools/visualization/velocity.py +411 -0
- chatspatial/utils/__init__.py +115 -0
- chatspatial/utils/adata_utils.py +1372 -0
- chatspatial/utils/compute.py +327 -0
- chatspatial/utils/data_loader.py +499 -0
- chatspatial/utils/dependency_manager.py +462 -0
- chatspatial/utils/device_utils.py +165 -0
- chatspatial/utils/exceptions.py +185 -0
- chatspatial/utils/image_utils.py +267 -0
- chatspatial/utils/mcp_utils.py +137 -0
- chatspatial/utils/path_utils.py +243 -0
- chatspatial/utils/persistence.py +78 -0
- chatspatial/utils/scipy_compat.py +143 -0
- chatspatial-1.1.0.dist-info/METADATA +242 -0
- chatspatial-1.1.0.dist-info/RECORD +67 -0
- chatspatial-1.1.0.dist-info/WHEEL +5 -0
- chatspatial-1.1.0.dist-info/entry_points.txt +2 -0
- chatspatial-1.1.0.dist-info/licenses/LICENSE +21 -0
- chatspatial-1.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,720 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spatial MCP Adapter for ChatSpatial
|
|
3
|
+
|
|
4
|
+
This module provides a clean abstraction layer between MCP protocol requirements
|
|
5
|
+
and ChatSpatial's spatial analysis functionality.
|
|
6
|
+
|
|
7
|
+
Design Principles:
|
|
8
|
+
- Single source of truth: One registry for visualization state
|
|
9
|
+
- Store params, not bytes: Images can be regenerated on demand
|
|
10
|
+
- LRU eviction: Automatic memory management
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import time
|
|
16
|
+
from collections import OrderedDict
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Any, Optional
|
|
19
|
+
|
|
20
|
+
from mcp.server.fastmcp import Context, FastMCP
|
|
21
|
+
from mcp.types import ToolAnnotations
|
|
22
|
+
|
|
23
|
+
# Import MCP improvements
|
|
24
|
+
from .models.data import VisualizationParameters
|
|
25
|
+
from .utils.exceptions import DataNotFoundError, ParameterError
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# TOOL ANNOTATIONS - Single Source of Truth
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# These annotations are passed to FastMCP's @mcp.tool() decorator to inform
|
|
34
|
+
# LLM clients about tool behavior characteristics.
|
|
35
|
+
#
|
|
36
|
+
# Annotation meanings (from MCP spec):
|
|
37
|
+
# - readOnlyHint: Tool only reads data, doesn't modify state
|
|
38
|
+
# - idempotentHint: Repeated calls with same args have no additional effect
|
|
39
|
+
# - openWorldHint: Tool may interact with external entities (network, files)
|
|
40
|
+
# =============================================================================
|
|
41
|
+
|
|
42
|
+
TOOL_ANNOTATIONS: dict[str, ToolAnnotations] = {
|
|
43
|
+
# Data I/O tools
|
|
44
|
+
"load_data": ToolAnnotations(
|
|
45
|
+
readOnlyHint=True, # Reads from filesystem, doesn't modify data
|
|
46
|
+
idempotentHint=True, # Loading same file yields same result
|
|
47
|
+
openWorldHint=True, # Accesses filesystem
|
|
48
|
+
),
|
|
49
|
+
"save_data": ToolAnnotations(
|
|
50
|
+
readOnlyHint=False, # Writes to filesystem
|
|
51
|
+
idempotentHint=True, # Saving same data to same path is idempotent
|
|
52
|
+
openWorldHint=True, # Accesses filesystem
|
|
53
|
+
),
|
|
54
|
+
# Preprocessing - modifies data in-place
|
|
55
|
+
"preprocess_data": ToolAnnotations(
|
|
56
|
+
readOnlyHint=False, # Modifies adata in-place
|
|
57
|
+
idempotentHint=False, # Re-running changes state
|
|
58
|
+
),
|
|
59
|
+
# Visualization - read-only analysis
|
|
60
|
+
"visualize_data": ToolAnnotations(
|
|
61
|
+
readOnlyHint=True, # Only reads data to generate plots
|
|
62
|
+
idempotentHint=True, # Same params yield same plot
|
|
63
|
+
),
|
|
64
|
+
"save_visualization": ToolAnnotations(
|
|
65
|
+
readOnlyHint=False, # Writes to filesystem
|
|
66
|
+
idempotentHint=True, # Saving same viz is idempotent
|
|
67
|
+
openWorldHint=True, # Accesses filesystem
|
|
68
|
+
),
|
|
69
|
+
"export_all_visualizations": ToolAnnotations(
|
|
70
|
+
readOnlyHint=False,
|
|
71
|
+
idempotentHint=True,
|
|
72
|
+
openWorldHint=True,
|
|
73
|
+
),
|
|
74
|
+
"clear_visualization_cache": ToolAnnotations(
|
|
75
|
+
readOnlyHint=False, # Clears cache
|
|
76
|
+
idempotentHint=True, # Clearing empty cache is idempotent
|
|
77
|
+
),
|
|
78
|
+
# Analysis tools - modify adata by adding results
|
|
79
|
+
"annotate_cell_types": ToolAnnotations(
|
|
80
|
+
readOnlyHint=False, # Adds cell type annotations to adata.obs
|
|
81
|
+
idempotentHint=False, # Re-running may yield different results
|
|
82
|
+
openWorldHint=True, # May use external references
|
|
83
|
+
),
|
|
84
|
+
"analyze_spatial_statistics": ToolAnnotations(
|
|
85
|
+
readOnlyHint=False, # Adds statistics to adata.uns
|
|
86
|
+
idempotentHint=True, # Same params yield same statistics
|
|
87
|
+
),
|
|
88
|
+
"find_markers": ToolAnnotations(
|
|
89
|
+
readOnlyHint=True, # Computes markers without modifying adata
|
|
90
|
+
idempotentHint=True, # Deterministic computation
|
|
91
|
+
),
|
|
92
|
+
"analyze_velocity_data": ToolAnnotations(
|
|
93
|
+
readOnlyHint=False, # Adds velocity to adata
|
|
94
|
+
idempotentHint=False, # Stochastic methods
|
|
95
|
+
),
|
|
96
|
+
"analyze_trajectory_data": ToolAnnotations(
|
|
97
|
+
readOnlyHint=False, # Adds trajectory info to adata
|
|
98
|
+
idempotentHint=False, # May have stochastic elements
|
|
99
|
+
),
|
|
100
|
+
"integrate_samples": ToolAnnotations(
|
|
101
|
+
readOnlyHint=False, # Creates new integrated dataset
|
|
102
|
+
idempotentHint=False, # Creates new dataset each time
|
|
103
|
+
),
|
|
104
|
+
"deconvolve_data": ToolAnnotations(
|
|
105
|
+
readOnlyHint=False, # Adds deconvolution results to adata
|
|
106
|
+
idempotentHint=False, # Deep learning methods are stochastic
|
|
107
|
+
openWorldHint=True, # May use external references
|
|
108
|
+
),
|
|
109
|
+
"identify_spatial_domains": ToolAnnotations(
|
|
110
|
+
readOnlyHint=False, # Adds domain labels to adata.obs
|
|
111
|
+
idempotentHint=False, # Clustering can vary
|
|
112
|
+
),
|
|
113
|
+
"analyze_cell_communication": ToolAnnotations(
|
|
114
|
+
readOnlyHint=False, # Adds communication results to adata.uns
|
|
115
|
+
idempotentHint=True, # Deterministic given same inputs
|
|
116
|
+
openWorldHint=True, # Uses LR databases
|
|
117
|
+
),
|
|
118
|
+
"analyze_enrichment": ToolAnnotations(
|
|
119
|
+
readOnlyHint=False, # Adds enrichment scores to adata
|
|
120
|
+
idempotentHint=True, # Deterministic
|
|
121
|
+
),
|
|
122
|
+
"find_spatial_genes": ToolAnnotations(
|
|
123
|
+
readOnlyHint=False, # Adds spatial gene info to adata.var
|
|
124
|
+
idempotentHint=True, # Deterministic methods
|
|
125
|
+
),
|
|
126
|
+
"analyze_cnv": ToolAnnotations(
|
|
127
|
+
readOnlyHint=False, # Adds CNV results to adata
|
|
128
|
+
idempotentHint=True, # Deterministic
|
|
129
|
+
),
|
|
130
|
+
"register_spatial_data": ToolAnnotations(
|
|
131
|
+
readOnlyHint=False, # Modifies spatial coordinates
|
|
132
|
+
idempotentHint=False, # Registration can vary
|
|
133
|
+
),
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def get_tool_annotations(tool_name: str) -> ToolAnnotations:
|
|
138
|
+
"""Get annotations for a tool by name.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
tool_name: Name of the tool (e.g., 'load_data', 'preprocess_data')
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
ToolAnnotations object for the tool. Returns conservative defaults
|
|
145
|
+
if tool is not in registry.
|
|
146
|
+
|
|
147
|
+
Usage:
|
|
148
|
+
@mcp.tool(annotations=get_tool_annotations("load_data"))
|
|
149
|
+
async def load_data(...): ...
|
|
150
|
+
"""
|
|
151
|
+
return TOOL_ANNOTATIONS.get(
|
|
152
|
+
tool_name,
|
|
153
|
+
# Conservative defaults: assume tool modifies state and is not idempotent
|
|
154
|
+
ToolAnnotations(readOnlyHint=False, idempotentHint=False),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# =============================================================================
|
|
159
|
+
# VISUALIZATION REGISTRY - Single Source of Truth
|
|
160
|
+
# =============================================================================
|
|
161
|
+
# First Principles Design:
|
|
162
|
+
# - Store params, not bytes: Images can be regenerated on demand from params
|
|
163
|
+
# - LRU eviction: Automatic memory management prevents unbounded growth
|
|
164
|
+
# - Unified cache key: {data_id}_{plot_type}[_{subtype}]
|
|
165
|
+
# =============================================================================
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
@dataclass
|
|
169
|
+
class VisualizationEntry:
|
|
170
|
+
"""A single visualization entry in the registry.
|
|
171
|
+
|
|
172
|
+
Stores the parameters needed to regenerate a visualization, not the image bytes.
|
|
173
|
+
This follows the first principle: data + params = reproducible output.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
params: VisualizationParameters
|
|
177
|
+
file_path: Optional[str] = None # Path to saved PNG (if large image)
|
|
178
|
+
timestamp: float = field(default_factory=time.time)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class VisualizationRegistry:
|
|
182
|
+
"""Single source of truth for visualization state.
|
|
183
|
+
|
|
184
|
+
Design Principles:
|
|
185
|
+
- Store params, not bytes: Images regenerated on demand
|
|
186
|
+
- LRU eviction: Oldest entries removed when max_entries exceeded
|
|
187
|
+
- Unified interface: One place for all visualization state
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
def __init__(self, max_entries: int = 100):
|
|
191
|
+
"""Initialize the registry with LRU capacity.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
max_entries: Maximum number of entries before LRU eviction
|
|
195
|
+
"""
|
|
196
|
+
self._entries: OrderedDict[str, VisualizationEntry] = OrderedDict()
|
|
197
|
+
self._max_entries = max_entries
|
|
198
|
+
|
|
199
|
+
def store(
|
|
200
|
+
self,
|
|
201
|
+
key: str,
|
|
202
|
+
params: VisualizationParameters,
|
|
203
|
+
file_path: Optional[str] = None,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""Store a visualization entry.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
key: Cache key format: {data_id}_{plot_type}[_{subtype}]
|
|
209
|
+
params: Visualization parameters for regeneration
|
|
210
|
+
file_path: Optional path to saved image file
|
|
211
|
+
"""
|
|
212
|
+
# Move to end if exists (LRU behavior)
|
|
213
|
+
if key in self._entries:
|
|
214
|
+
self._entries.move_to_end(key)
|
|
215
|
+
|
|
216
|
+
self._entries[key] = VisualizationEntry(
|
|
217
|
+
params=params,
|
|
218
|
+
file_path=file_path,
|
|
219
|
+
timestamp=time.time(),
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# LRU eviction
|
|
223
|
+
while len(self._entries) > self._max_entries:
|
|
224
|
+
oldest_key, oldest_entry = self._entries.popitem(last=False)
|
|
225
|
+
# Clean up file if exists
|
|
226
|
+
if oldest_entry.file_path and os.path.exists(oldest_entry.file_path):
|
|
227
|
+
try:
|
|
228
|
+
os.remove(oldest_entry.file_path)
|
|
229
|
+
except OSError:
|
|
230
|
+
pass
|
|
231
|
+
|
|
232
|
+
def get(self, key: str) -> Optional[VisualizationEntry]:
|
|
233
|
+
"""Get a visualization entry.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
key: Cache key to look up
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
VisualizationEntry if found, None otherwise
|
|
240
|
+
"""
|
|
241
|
+
if key in self._entries:
|
|
242
|
+
self._entries.move_to_end(key) # LRU touch
|
|
243
|
+
return self._entries[key]
|
|
244
|
+
return None
|
|
245
|
+
|
|
246
|
+
def exists(self, key: str) -> bool:
|
|
247
|
+
"""Check if a visualization exists in registry."""
|
|
248
|
+
return key in self._entries
|
|
249
|
+
|
|
250
|
+
def list_for_dataset(self, data_id: str) -> list[str]:
|
|
251
|
+
"""List all visualization keys for a dataset.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
data_id: Dataset identifier
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
List of cache keys matching the dataset
|
|
258
|
+
"""
|
|
259
|
+
prefix = f"{data_id}_"
|
|
260
|
+
return [k for k in self._entries.keys() if k.startswith(prefix)]
|
|
261
|
+
|
|
262
|
+
def clear(self, prefix: Optional[str] = None) -> int:
|
|
263
|
+
"""Clear visualizations from registry.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
prefix: Optional prefix to filter which keys to clear.
|
|
267
|
+
If None, clears all visualizations.
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Number of entries cleared
|
|
271
|
+
"""
|
|
272
|
+
if prefix is None:
|
|
273
|
+
count = len(self._entries)
|
|
274
|
+
# Clean up all files
|
|
275
|
+
for entry in self._entries.values():
|
|
276
|
+
if entry.file_path and os.path.exists(entry.file_path):
|
|
277
|
+
try:
|
|
278
|
+
os.remove(entry.file_path)
|
|
279
|
+
except OSError:
|
|
280
|
+
pass
|
|
281
|
+
self._entries.clear()
|
|
282
|
+
return count
|
|
283
|
+
|
|
284
|
+
keys_to_remove = [k for k in self._entries if k.startswith(prefix)]
|
|
285
|
+
for key in keys_to_remove:
|
|
286
|
+
entry = self._entries.pop(key)
|
|
287
|
+
if entry.file_path and os.path.exists(entry.file_path):
|
|
288
|
+
try:
|
|
289
|
+
os.remove(entry.file_path)
|
|
290
|
+
except OSError:
|
|
291
|
+
pass
|
|
292
|
+
return len(keys_to_remove)
|
|
293
|
+
|
|
294
|
+
def keys(self) -> list[str]:
|
|
295
|
+
"""Return all keys in the registry."""
|
|
296
|
+
return list(self._entries.keys())
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class SpatialMCPAdapter:
|
|
300
|
+
"""Main adapter class that bridges MCP and spatial analysis functionality.
|
|
301
|
+
|
|
302
|
+
Simplified design: Only manages data and visualization registry.
|
|
303
|
+
Removed dead code (ResourceManager was never registered to MCP server).
|
|
304
|
+
"""
|
|
305
|
+
|
|
306
|
+
def __init__(self, mcp_server: FastMCP, data_manager: "DefaultSpatialDataManager"):
|
|
307
|
+
self.mcp = mcp_server
|
|
308
|
+
self.data_manager = data_manager
|
|
309
|
+
self.visualization_registry = VisualizationRegistry(max_entries=100)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class DefaultSpatialDataManager:
|
|
313
|
+
"""Default implementation of spatial data management"""
|
|
314
|
+
|
|
315
|
+
def __init__(self):
|
|
316
|
+
self.data_store: dict[str, Any] = {}
|
|
317
|
+
self._next_id = 1
|
|
318
|
+
|
|
319
|
+
async def load_dataset(
|
|
320
|
+
self, path: str, data_type: str, name: Optional[str] = None
|
|
321
|
+
) -> str:
|
|
322
|
+
"""Load a spatial dataset and return its ID"""
|
|
323
|
+
from .utils.data_loader import load_spatial_data
|
|
324
|
+
|
|
325
|
+
# Load data
|
|
326
|
+
dataset_info = await load_spatial_data(path, data_type, name)
|
|
327
|
+
|
|
328
|
+
# Generate ID
|
|
329
|
+
data_id = f"data_{self._next_id}"
|
|
330
|
+
self._next_id += 1
|
|
331
|
+
|
|
332
|
+
# Store data
|
|
333
|
+
self.data_store[data_id] = dataset_info
|
|
334
|
+
|
|
335
|
+
return data_id
|
|
336
|
+
|
|
337
|
+
async def get_dataset(self, data_id: str) -> Any:
|
|
338
|
+
"""Get a dataset by ID"""
|
|
339
|
+
if data_id not in self.data_store:
|
|
340
|
+
raise DataNotFoundError(f"Dataset {data_id} not found")
|
|
341
|
+
return self.data_store[data_id]
|
|
342
|
+
|
|
343
|
+
async def list_datasets(self) -> list[dict[str, Any]]:
|
|
344
|
+
"""List all loaded datasets"""
|
|
345
|
+
return [
|
|
346
|
+
{
|
|
347
|
+
"id": data_id,
|
|
348
|
+
"name": info.get("name", f"Dataset {data_id}"),
|
|
349
|
+
"type": info.get("type", "unknown"),
|
|
350
|
+
"n_cells": info.get("n_cells", 0),
|
|
351
|
+
"n_genes": info.get("n_genes", 0),
|
|
352
|
+
}
|
|
353
|
+
for data_id, info in self.data_store.items()
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
async def save_result(self, data_id: str, result_type: str, result: Any) -> None:
|
|
357
|
+
"""Save analysis results"""
|
|
358
|
+
if data_id not in self.data_store:
|
|
359
|
+
raise DataNotFoundError(f"Dataset {data_id} not found")
|
|
360
|
+
|
|
361
|
+
if "results" not in self.data_store[data_id]:
|
|
362
|
+
self.data_store[data_id]["results"] = {}
|
|
363
|
+
|
|
364
|
+
self.data_store[data_id]["results"][result_type] = result
|
|
365
|
+
|
|
366
|
+
async def get_result(self, data_id: str, result_type: str) -> Any:
|
|
367
|
+
"""Get analysis results"""
|
|
368
|
+
if data_id not in self.data_store:
|
|
369
|
+
raise DataNotFoundError(f"Dataset {data_id} not found")
|
|
370
|
+
|
|
371
|
+
results = self.data_store[data_id].get("results", {})
|
|
372
|
+
if result_type not in results:
|
|
373
|
+
raise DataNotFoundError(
|
|
374
|
+
f"No {result_type} results found for dataset {data_id}"
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
return results[result_type]
|
|
378
|
+
|
|
379
|
+
def dataset_exists(self, data_id: str) -> bool:
|
|
380
|
+
"""Check if a dataset exists.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
data_id: Dataset identifier
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
True if the dataset exists, False otherwise
|
|
387
|
+
"""
|
|
388
|
+
return data_id in self.data_store
|
|
389
|
+
|
|
390
|
+
async def update_adata(self, data_id: str, adata: Any) -> None:
|
|
391
|
+
"""Update the adata object for an existing dataset.
|
|
392
|
+
|
|
393
|
+
Use this when preprocessing creates a new adata object (e.g., copy,
|
|
394
|
+
subsample, or format conversion).
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
data_id: Dataset identifier
|
|
398
|
+
adata: New AnnData object to store
|
|
399
|
+
|
|
400
|
+
Raises:
|
|
401
|
+
DataNotFoundError: If dataset not found
|
|
402
|
+
"""
|
|
403
|
+
if data_id not in self.data_store:
|
|
404
|
+
raise DataNotFoundError(f"Dataset {data_id} not found")
|
|
405
|
+
self.data_store[data_id]["adata"] = adata
|
|
406
|
+
|
|
407
|
+
async def create_dataset(
|
|
408
|
+
self,
|
|
409
|
+
data_id: str,
|
|
410
|
+
adata: Any,
|
|
411
|
+
name: Optional[str] = None,
|
|
412
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
413
|
+
) -> None:
|
|
414
|
+
"""Create a new dataset with specified ID.
|
|
415
|
+
|
|
416
|
+
Use this when creating derived datasets (e.g., integration results,
|
|
417
|
+
subset data).
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
data_id: Unique identifier for the new dataset
|
|
421
|
+
adata: AnnData object to store
|
|
422
|
+
name: Optional display name for the dataset
|
|
423
|
+
metadata: Optional additional metadata dict
|
|
424
|
+
|
|
425
|
+
Raises:
|
|
426
|
+
ParameterError: If dataset with same ID already exists
|
|
427
|
+
"""
|
|
428
|
+
if data_id in self.data_store:
|
|
429
|
+
raise ParameterError(
|
|
430
|
+
f"Dataset {data_id} already exists. Use update_adata() to update."
|
|
431
|
+
)
|
|
432
|
+
dataset_info: dict[str, Any] = {"adata": adata}
|
|
433
|
+
if name:
|
|
434
|
+
dataset_info["name"] = name
|
|
435
|
+
if metadata:
|
|
436
|
+
dataset_info.update(metadata)
|
|
437
|
+
self.data_store[data_id] = dataset_info
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
@dataclass
|
|
441
|
+
class ToolContext:
|
|
442
|
+
"""Unified context for ChatSpatial tool execution.
|
|
443
|
+
|
|
444
|
+
This class provides a clean interface for tools to access data and logging
|
|
445
|
+
without the redundant data_store dict wrapping pattern.
|
|
446
|
+
|
|
447
|
+
Design Rationale:
|
|
448
|
+
- Python dict assignment is reference, not copy. The old pattern of wrapping
|
|
449
|
+
dataset_info in a temp dict and "writing back" was completely unnecessary.
|
|
450
|
+
- Tools should access adata directly via get_adata(), not through dict wrapping.
|
|
451
|
+
- Logging methods fall back gracefully when MCP context is unavailable.
|
|
452
|
+
|
|
453
|
+
Logging Strategy:
|
|
454
|
+
- User-visible messages: await ctx.info(), await ctx.warning(), await ctx.error()
|
|
455
|
+
These appear in Claude's conversation and provide user-friendly progress updates.
|
|
456
|
+
- Developer debugging: ctx.debug()
|
|
457
|
+
This writes to Python logger for debugging, not visible to users.
|
|
458
|
+
|
|
459
|
+
Usage:
|
|
460
|
+
async def my_tool(data_id: str, ctx: ToolContext, params: Params) -> Result:
|
|
461
|
+
adata = await ctx.get_adata(data_id)
|
|
462
|
+
await ctx.info(f"Processing {adata.n_obs} cells") # User sees this
|
|
463
|
+
ctx.debug(f"Internal state: {some_detail}") # Developer log only
|
|
464
|
+
# ... analysis logic ...
|
|
465
|
+
return result
|
|
466
|
+
"""
|
|
467
|
+
|
|
468
|
+
_data_manager: "DefaultSpatialDataManager"
|
|
469
|
+
_mcp_context: Optional[Context] = None
|
|
470
|
+
_visualization_registry: Optional["VisualizationRegistry"] = None
|
|
471
|
+
_logger: Optional[logging.Logger] = field(default=None, repr=False)
|
|
472
|
+
|
|
473
|
+
def __post_init__(self) -> None:
|
|
474
|
+
"""Initialize the logger for debug messages."""
|
|
475
|
+
if self._logger is None:
|
|
476
|
+
self._logger = logging.getLogger("chatspatial.tools")
|
|
477
|
+
|
|
478
|
+
def debug(self, msg: str) -> None:
|
|
479
|
+
"""Log debug message for developers (not visible to users).
|
|
480
|
+
|
|
481
|
+
Use this for detailed technical information that helps with debugging
|
|
482
|
+
but would be noise for end users. These messages go to Python logger.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
msg: Debug message to log
|
|
486
|
+
"""
|
|
487
|
+
if self._logger:
|
|
488
|
+
self._logger.debug(msg)
|
|
489
|
+
|
|
490
|
+
def log_config(self, title: str, config: dict[str, Any]) -> None:
|
|
491
|
+
"""Log configuration details for developers.
|
|
492
|
+
|
|
493
|
+
Convenience method for logging parameter configurations in a
|
|
494
|
+
structured format. Goes to Python logger, not user-visible.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
title: Configuration section title
|
|
498
|
+
config: Dictionary of configuration key-value pairs
|
|
499
|
+
"""
|
|
500
|
+
if self._logger:
|
|
501
|
+
self._logger.debug("=" * 50)
|
|
502
|
+
self._logger.debug(f"{title}:")
|
|
503
|
+
for key, value in config.items():
|
|
504
|
+
self._logger.debug(f" {key}: {value}")
|
|
505
|
+
self._logger.debug("=" * 50)
|
|
506
|
+
|
|
507
|
+
async def get_adata(self, data_id: str) -> Any:
|
|
508
|
+
"""Get AnnData object directly by ID.
|
|
509
|
+
|
|
510
|
+
This is the primary data access method for tools. Returns the AnnData
|
|
511
|
+
object directly without intermediate dict wrapping.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
data_id: Dataset identifier
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
AnnData object for the dataset
|
|
518
|
+
|
|
519
|
+
Raises:
|
|
520
|
+
ValueError: If dataset not found
|
|
521
|
+
"""
|
|
522
|
+
dataset_info = await self._data_manager.get_dataset(data_id)
|
|
523
|
+
return dataset_info["adata"]
|
|
524
|
+
|
|
525
|
+
async def get_dataset_info(self, data_id: str) -> dict[str, Any]:
|
|
526
|
+
"""Get full dataset info dict when metadata is needed.
|
|
527
|
+
|
|
528
|
+
Use this only when you need access to metadata beyond adata,
|
|
529
|
+
such as 'name', 'type', 'source_path', etc.
|
|
530
|
+
"""
|
|
531
|
+
return await self._data_manager.get_dataset(data_id)
|
|
532
|
+
|
|
533
|
+
async def set_adata(self, data_id: str, adata: Any) -> None:
|
|
534
|
+
"""Update the AnnData object for a dataset.
|
|
535
|
+
|
|
536
|
+
Use this when preprocessing creates a new adata object (e.g., copy,
|
|
537
|
+
subsample, or format conversion). This updates the reference in the
|
|
538
|
+
data manager's store.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
data_id: Dataset identifier
|
|
542
|
+
adata: New AnnData object to store
|
|
543
|
+
|
|
544
|
+
Raises:
|
|
545
|
+
ValueError: If dataset not found
|
|
546
|
+
"""
|
|
547
|
+
await self._data_manager.update_adata(data_id, adata)
|
|
548
|
+
|
|
549
|
+
async def add_dataset(
|
|
550
|
+
self,
|
|
551
|
+
data_id: str,
|
|
552
|
+
adata: Any,
|
|
553
|
+
name: Optional[str] = None,
|
|
554
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
555
|
+
) -> None:
|
|
556
|
+
"""Add a new dataset to the data store.
|
|
557
|
+
|
|
558
|
+
Use this when creating new datasets (e.g., integration results,
|
|
559
|
+
subset data, or derived datasets).
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
data_id: Unique identifier for the new dataset
|
|
563
|
+
adata: AnnData object to store
|
|
564
|
+
name: Optional display name for the dataset
|
|
565
|
+
metadata: Optional additional metadata dict
|
|
566
|
+
|
|
567
|
+
Raises:
|
|
568
|
+
ValueError: If dataset with same ID already exists
|
|
569
|
+
"""
|
|
570
|
+
await self._data_manager.create_dataset(data_id, adata, name, metadata)
|
|
571
|
+
|
|
572
|
+
async def info(self, msg: str) -> None:
|
|
573
|
+
"""Log info message to MCP context if available."""
|
|
574
|
+
if self._mcp_context:
|
|
575
|
+
await self._mcp_context.info(msg)
|
|
576
|
+
|
|
577
|
+
async def warning(self, msg: str) -> None:
|
|
578
|
+
"""Log warning message to MCP context if available."""
|
|
579
|
+
if self._mcp_context:
|
|
580
|
+
await self._mcp_context.warning(msg)
|
|
581
|
+
|
|
582
|
+
async def error(self, msg: str) -> None:
|
|
583
|
+
"""Log error message to MCP context if available."""
|
|
584
|
+
if self._mcp_context:
|
|
585
|
+
await self._mcp_context.error(msg)
|
|
586
|
+
|
|
587
|
+
def get_visualization_registry(self) -> Optional["VisualizationRegistry"]:
|
|
588
|
+
"""Get the visualization registry.
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
VisualizationRegistry instance if set, None otherwise
|
|
592
|
+
"""
|
|
593
|
+
return self._visualization_registry
|
|
594
|
+
|
|
595
|
+
def store_visualization(
|
|
596
|
+
self,
|
|
597
|
+
key: str,
|
|
598
|
+
params: VisualizationParameters,
|
|
599
|
+
file_path: Optional[str] = None,
|
|
600
|
+
) -> None:
|
|
601
|
+
"""Store a visualization in the registry.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
key: Cache key format: {data_id}_{plot_type}[_{subtype}]
|
|
605
|
+
params: Visualization parameters for regeneration
|
|
606
|
+
file_path: Optional path to saved image file
|
|
607
|
+
"""
|
|
608
|
+
if self._visualization_registry is not None:
|
|
609
|
+
self._visualization_registry.store(key, params, file_path)
|
|
610
|
+
|
|
611
|
+
def get_visualization(self, key: str) -> Optional[VisualizationEntry]:
|
|
612
|
+
"""Get a visualization entry from the registry.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
key: Cache key for the visualization
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
VisualizationEntry if found, None otherwise
|
|
619
|
+
"""
|
|
620
|
+
if self._visualization_registry is None:
|
|
621
|
+
return None
|
|
622
|
+
return self._visualization_registry.get(key)
|
|
623
|
+
|
|
624
|
+
def visualization_exists(self, key: str) -> bool:
|
|
625
|
+
"""Check if a visualization exists in registry.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
key: Cache key to check
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
True if exists, False otherwise
|
|
632
|
+
"""
|
|
633
|
+
if self._visualization_registry is None:
|
|
634
|
+
return False
|
|
635
|
+
return self._visualization_registry.exists(key)
|
|
636
|
+
|
|
637
|
+
def list_visualizations(self, data_id: str) -> list[str]:
|
|
638
|
+
"""List all visualization keys for a dataset.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
data_id: Dataset identifier
|
|
642
|
+
|
|
643
|
+
Returns:
|
|
644
|
+
List of cache keys matching the dataset
|
|
645
|
+
"""
|
|
646
|
+
if self._visualization_registry is None:
|
|
647
|
+
return []
|
|
648
|
+
return self._visualization_registry.list_for_dataset(data_id)
|
|
649
|
+
|
|
650
|
+
def clear_visualizations(self, prefix: Optional[str] = None) -> int:
|
|
651
|
+
"""Clear visualizations from the registry.
|
|
652
|
+
|
|
653
|
+
Args:
|
|
654
|
+
prefix: Optional prefix to filter which keys to clear.
|
|
655
|
+
If None, clears all visualizations.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
Number of visualizations cleared
|
|
659
|
+
"""
|
|
660
|
+
if self._visualization_registry is None:
|
|
661
|
+
return 0
|
|
662
|
+
return self._visualization_registry.clear(prefix)
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def create_spatial_mcp_server(
|
|
666
|
+
server_name: str = "ChatSpatial",
|
|
667
|
+
data_manager: Optional[DefaultSpatialDataManager] = None,
|
|
668
|
+
) -> tuple[FastMCP, SpatialMCPAdapter]:
|
|
669
|
+
"""
|
|
670
|
+
Create and configure a spatial MCP server with adapter
|
|
671
|
+
|
|
672
|
+
Args:
|
|
673
|
+
server_name: Name of the MCP server
|
|
674
|
+
data_manager: Optional custom data manager (uses default if None)
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
Tuple of (FastMCP server instance, SpatialMCPAdapter instance)
|
|
678
|
+
"""
|
|
679
|
+
# Server instructions for LLM guidance on tool usage
|
|
680
|
+
instructions = """ChatSpatial provides spatial transcriptomics analysis through 60+ integrated methods across 15 analytical categories.
|
|
681
|
+
|
|
682
|
+
CORE WORKFLOW PATTERN:
|
|
683
|
+
1. Always start with load_data() to import spatial transcriptomics data
|
|
684
|
+
2. Run preprocess_data() before most analytical tools (required for clustering, spatial analysis, etc.)
|
|
685
|
+
3. Use visualize_data() to inspect results after each analysis step
|
|
686
|
+
|
|
687
|
+
CRITICAL OPERATIONAL CONSTRAINTS:
|
|
688
|
+
- Preprocessing creates filtered gene sets for efficiency but preserves raw data in adata.raw
|
|
689
|
+
- Cell communication analysis automatically uses adata.raw when available for comprehensive gene coverage
|
|
690
|
+
- Species-specific parameters are critical: set species="mouse" or "human" and use appropriate resources (e.g., liana_resource="mouseconsensus" for mouse)
|
|
691
|
+
- Reference data for annotation methods (tangram, scanvi) must be PREPROCESSED before use
|
|
692
|
+
|
|
693
|
+
PLATFORM-SPECIFIC GUIDANCE:
|
|
694
|
+
- Spot-based platforms (Visium, Slide-seq): Deconvolution is recommended to infer cell type compositions
|
|
695
|
+
- Single-cell platforms (MERFISH, Xenium, CosMx): Skip deconvolution - native single-cell resolution provided
|
|
696
|
+
- Visium with histology images: Use SpaGCN for spatial domain identification
|
|
697
|
+
- High-resolution data without images: Use STAGATE or GraphST
|
|
698
|
+
|
|
699
|
+
TOOL RELATIONSHIPS:
|
|
700
|
+
- Spatial domain identification → Enables spatial statistics (neighborhood enrichment, co-occurrence)
|
|
701
|
+
- Cell type annotation → Required for cell communication analysis
|
|
702
|
+
- Deconvolution results → Can be used for downstream spatial statistics
|
|
703
|
+
- Integration → Recommended before cross-sample comparative analyses
|
|
704
|
+
|
|
705
|
+
PARAMETER GUIDANCE:
|
|
706
|
+
All tools include comprehensive parameter documentation in their schemas. Refer to tool descriptions for default values, platform-specific optimizations, and method-specific requirements.
|
|
707
|
+
|
|
708
|
+
For multi-step analyses, preserve data_id across operations to maintain analysis continuity."""
|
|
709
|
+
|
|
710
|
+
# Create MCP server with instructions
|
|
711
|
+
mcp = FastMCP(server_name, instructions=instructions)
|
|
712
|
+
|
|
713
|
+
# Create data manager if not provided
|
|
714
|
+
if data_manager is None:
|
|
715
|
+
data_manager = DefaultSpatialDataManager()
|
|
716
|
+
|
|
717
|
+
# Create adapter
|
|
718
|
+
adapter = SpatialMCPAdapter(mcp, data_manager)
|
|
719
|
+
|
|
720
|
+
return mcp, adapter
|