chatspatial 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. chatspatial/__init__.py +11 -0
  2. chatspatial/__main__.py +141 -0
  3. chatspatial/cli/__init__.py +7 -0
  4. chatspatial/config.py +53 -0
  5. chatspatial/models/__init__.py +85 -0
  6. chatspatial/models/analysis.py +513 -0
  7. chatspatial/models/data.py +2462 -0
  8. chatspatial/server.py +1763 -0
  9. chatspatial/spatial_mcp_adapter.py +720 -0
  10. chatspatial/tools/__init__.py +3 -0
  11. chatspatial/tools/annotation.py +1903 -0
  12. chatspatial/tools/cell_communication.py +1603 -0
  13. chatspatial/tools/cnv_analysis.py +605 -0
  14. chatspatial/tools/condition_comparison.py +595 -0
  15. chatspatial/tools/deconvolution/__init__.py +402 -0
  16. chatspatial/tools/deconvolution/base.py +318 -0
  17. chatspatial/tools/deconvolution/card.py +244 -0
  18. chatspatial/tools/deconvolution/cell2location.py +326 -0
  19. chatspatial/tools/deconvolution/destvi.py +144 -0
  20. chatspatial/tools/deconvolution/flashdeconv.py +101 -0
  21. chatspatial/tools/deconvolution/rctd.py +317 -0
  22. chatspatial/tools/deconvolution/spotlight.py +216 -0
  23. chatspatial/tools/deconvolution/stereoscope.py +109 -0
  24. chatspatial/tools/deconvolution/tangram.py +135 -0
  25. chatspatial/tools/differential.py +625 -0
  26. chatspatial/tools/embeddings.py +298 -0
  27. chatspatial/tools/enrichment.py +1863 -0
  28. chatspatial/tools/integration.py +807 -0
  29. chatspatial/tools/preprocessing.py +723 -0
  30. chatspatial/tools/spatial_domains.py +808 -0
  31. chatspatial/tools/spatial_genes.py +836 -0
  32. chatspatial/tools/spatial_registration.py +441 -0
  33. chatspatial/tools/spatial_statistics.py +1476 -0
  34. chatspatial/tools/trajectory.py +495 -0
  35. chatspatial/tools/velocity.py +405 -0
  36. chatspatial/tools/visualization/__init__.py +155 -0
  37. chatspatial/tools/visualization/basic.py +393 -0
  38. chatspatial/tools/visualization/cell_comm.py +699 -0
  39. chatspatial/tools/visualization/cnv.py +320 -0
  40. chatspatial/tools/visualization/core.py +684 -0
  41. chatspatial/tools/visualization/deconvolution.py +852 -0
  42. chatspatial/tools/visualization/enrichment.py +660 -0
  43. chatspatial/tools/visualization/integration.py +205 -0
  44. chatspatial/tools/visualization/main.py +164 -0
  45. chatspatial/tools/visualization/multi_gene.py +739 -0
  46. chatspatial/tools/visualization/persistence.py +335 -0
  47. chatspatial/tools/visualization/spatial_stats.py +469 -0
  48. chatspatial/tools/visualization/trajectory.py +639 -0
  49. chatspatial/tools/visualization/velocity.py +411 -0
  50. chatspatial/utils/__init__.py +115 -0
  51. chatspatial/utils/adata_utils.py +1372 -0
  52. chatspatial/utils/compute.py +327 -0
  53. chatspatial/utils/data_loader.py +499 -0
  54. chatspatial/utils/dependency_manager.py +462 -0
  55. chatspatial/utils/device_utils.py +165 -0
  56. chatspatial/utils/exceptions.py +185 -0
  57. chatspatial/utils/image_utils.py +267 -0
  58. chatspatial/utils/mcp_utils.py +137 -0
  59. chatspatial/utils/path_utils.py +243 -0
  60. chatspatial/utils/persistence.py +78 -0
  61. chatspatial/utils/scipy_compat.py +143 -0
  62. chatspatial-1.1.0.dist-info/METADATA +242 -0
  63. chatspatial-1.1.0.dist-info/RECORD +67 -0
  64. chatspatial-1.1.0.dist-info/WHEEL +5 -0
  65. chatspatial-1.1.0.dist-info/entry_points.txt +2 -0
  66. chatspatial-1.1.0.dist-info/licenses/LICENSE +21 -0
  67. chatspatial-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,720 @@
1
+ """
2
+ Spatial MCP Adapter for ChatSpatial
3
+
4
+ This module provides a clean abstraction layer between MCP protocol requirements
5
+ and ChatSpatial's spatial analysis functionality.
6
+
7
+ Design Principles:
8
+ - Single source of truth: One registry for visualization state
9
+ - Store params, not bytes: Images can be regenerated on demand
10
+ - LRU eviction: Automatic memory management
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ import time
16
+ from collections import OrderedDict
17
+ from dataclasses import dataclass, field
18
+ from typing import Any, Optional
19
+
20
+ from mcp.server.fastmcp import Context, FastMCP
21
+ from mcp.types import ToolAnnotations
22
+
23
+ # Import MCP improvements
24
+ from .models.data import VisualizationParameters
25
+ from .utils.exceptions import DataNotFoundError, ParameterError
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # =============================================================================
31
+ # TOOL ANNOTATIONS - Single Source of Truth
32
+ # =============================================================================
33
+ # These annotations are passed to FastMCP's @mcp.tool() decorator to inform
34
+ # LLM clients about tool behavior characteristics.
35
+ #
36
+ # Annotation meanings (from MCP spec):
37
+ # - readOnlyHint: Tool only reads data, doesn't modify state
38
+ # - idempotentHint: Repeated calls with same args have no additional effect
39
+ # - openWorldHint: Tool may interact with external entities (network, files)
40
+ # =============================================================================
41
+
42
+ TOOL_ANNOTATIONS: dict[str, ToolAnnotations] = {
43
+ # Data I/O tools
44
+ "load_data": ToolAnnotations(
45
+ readOnlyHint=True, # Reads from filesystem, doesn't modify data
46
+ idempotentHint=True, # Loading same file yields same result
47
+ openWorldHint=True, # Accesses filesystem
48
+ ),
49
+ "save_data": ToolAnnotations(
50
+ readOnlyHint=False, # Writes to filesystem
51
+ idempotentHint=True, # Saving same data to same path is idempotent
52
+ openWorldHint=True, # Accesses filesystem
53
+ ),
54
+ # Preprocessing - modifies data in-place
55
+ "preprocess_data": ToolAnnotations(
56
+ readOnlyHint=False, # Modifies adata in-place
57
+ idempotentHint=False, # Re-running changes state
58
+ ),
59
+ # Visualization - read-only analysis
60
+ "visualize_data": ToolAnnotations(
61
+ readOnlyHint=True, # Only reads data to generate plots
62
+ idempotentHint=True, # Same params yield same plot
63
+ ),
64
+ "save_visualization": ToolAnnotations(
65
+ readOnlyHint=False, # Writes to filesystem
66
+ idempotentHint=True, # Saving same viz is idempotent
67
+ openWorldHint=True, # Accesses filesystem
68
+ ),
69
+ "export_all_visualizations": ToolAnnotations(
70
+ readOnlyHint=False,
71
+ idempotentHint=True,
72
+ openWorldHint=True,
73
+ ),
74
+ "clear_visualization_cache": ToolAnnotations(
75
+ readOnlyHint=False, # Clears cache
76
+ idempotentHint=True, # Clearing empty cache is idempotent
77
+ ),
78
+ # Analysis tools - modify adata by adding results
79
+ "annotate_cell_types": ToolAnnotations(
80
+ readOnlyHint=False, # Adds cell type annotations to adata.obs
81
+ idempotentHint=False, # Re-running may yield different results
82
+ openWorldHint=True, # May use external references
83
+ ),
84
+ "analyze_spatial_statistics": ToolAnnotations(
85
+ readOnlyHint=False, # Adds statistics to adata.uns
86
+ idempotentHint=True, # Same params yield same statistics
87
+ ),
88
+ "find_markers": ToolAnnotations(
89
+ readOnlyHint=True, # Computes markers without modifying adata
90
+ idempotentHint=True, # Deterministic computation
91
+ ),
92
+ "analyze_velocity_data": ToolAnnotations(
93
+ readOnlyHint=False, # Adds velocity to adata
94
+ idempotentHint=False, # Stochastic methods
95
+ ),
96
+ "analyze_trajectory_data": ToolAnnotations(
97
+ readOnlyHint=False, # Adds trajectory info to adata
98
+ idempotentHint=False, # May have stochastic elements
99
+ ),
100
+ "integrate_samples": ToolAnnotations(
101
+ readOnlyHint=False, # Creates new integrated dataset
102
+ idempotentHint=False, # Creates new dataset each time
103
+ ),
104
+ "deconvolve_data": ToolAnnotations(
105
+ readOnlyHint=False, # Adds deconvolution results to adata
106
+ idempotentHint=False, # Deep learning methods are stochastic
107
+ openWorldHint=True, # May use external references
108
+ ),
109
+ "identify_spatial_domains": ToolAnnotations(
110
+ readOnlyHint=False, # Adds domain labels to adata.obs
111
+ idempotentHint=False, # Clustering can vary
112
+ ),
113
+ "analyze_cell_communication": ToolAnnotations(
114
+ readOnlyHint=False, # Adds communication results to adata.uns
115
+ idempotentHint=True, # Deterministic given same inputs
116
+ openWorldHint=True, # Uses LR databases
117
+ ),
118
+ "analyze_enrichment": ToolAnnotations(
119
+ readOnlyHint=False, # Adds enrichment scores to adata
120
+ idempotentHint=True, # Deterministic
121
+ ),
122
+ "find_spatial_genes": ToolAnnotations(
123
+ readOnlyHint=False, # Adds spatial gene info to adata.var
124
+ idempotentHint=True, # Deterministic methods
125
+ ),
126
+ "analyze_cnv": ToolAnnotations(
127
+ readOnlyHint=False, # Adds CNV results to adata
128
+ idempotentHint=True, # Deterministic
129
+ ),
130
+ "register_spatial_data": ToolAnnotations(
131
+ readOnlyHint=False, # Modifies spatial coordinates
132
+ idempotentHint=False, # Registration can vary
133
+ ),
134
+ }
135
+
136
+
137
+ def get_tool_annotations(tool_name: str) -> ToolAnnotations:
138
+ """Get annotations for a tool by name.
139
+
140
+ Args:
141
+ tool_name: Name of the tool (e.g., 'load_data', 'preprocess_data')
142
+
143
+ Returns:
144
+ ToolAnnotations object for the tool. Returns conservative defaults
145
+ if tool is not in registry.
146
+
147
+ Usage:
148
+ @mcp.tool(annotations=get_tool_annotations("load_data"))
149
+ async def load_data(...): ...
150
+ """
151
+ return TOOL_ANNOTATIONS.get(
152
+ tool_name,
153
+ # Conservative defaults: assume tool modifies state and is not idempotent
154
+ ToolAnnotations(readOnlyHint=False, idempotentHint=False),
155
+ )
156
+
157
+
158
+ # =============================================================================
159
+ # VISUALIZATION REGISTRY - Single Source of Truth
160
+ # =============================================================================
161
+ # First Principles Design:
162
+ # - Store params, not bytes: Images can be regenerated on demand from params
163
+ # - LRU eviction: Automatic memory management prevents unbounded growth
164
+ # - Unified cache key: {data_id}_{plot_type}[_{subtype}]
165
+ # =============================================================================
166
+
167
+
168
+ @dataclass
169
+ class VisualizationEntry:
170
+ """A single visualization entry in the registry.
171
+
172
+ Stores the parameters needed to regenerate a visualization, not the image bytes.
173
+ This follows the first principle: data + params = reproducible output.
174
+ """
175
+
176
+ params: VisualizationParameters
177
+ file_path: Optional[str] = None # Path to saved PNG (if large image)
178
+ timestamp: float = field(default_factory=time.time)
179
+
180
+
181
+ class VisualizationRegistry:
182
+ """Single source of truth for visualization state.
183
+
184
+ Design Principles:
185
+ - Store params, not bytes: Images regenerated on demand
186
+ - LRU eviction: Oldest entries removed when max_entries exceeded
187
+ - Unified interface: One place for all visualization state
188
+ """
189
+
190
+ def __init__(self, max_entries: int = 100):
191
+ """Initialize the registry with LRU capacity.
192
+
193
+ Args:
194
+ max_entries: Maximum number of entries before LRU eviction
195
+ """
196
+ self._entries: OrderedDict[str, VisualizationEntry] = OrderedDict()
197
+ self._max_entries = max_entries
198
+
199
+ def store(
200
+ self,
201
+ key: str,
202
+ params: VisualizationParameters,
203
+ file_path: Optional[str] = None,
204
+ ) -> None:
205
+ """Store a visualization entry.
206
+
207
+ Args:
208
+ key: Cache key format: {data_id}_{plot_type}[_{subtype}]
209
+ params: Visualization parameters for regeneration
210
+ file_path: Optional path to saved image file
211
+ """
212
+ # Move to end if exists (LRU behavior)
213
+ if key in self._entries:
214
+ self._entries.move_to_end(key)
215
+
216
+ self._entries[key] = VisualizationEntry(
217
+ params=params,
218
+ file_path=file_path,
219
+ timestamp=time.time(),
220
+ )
221
+
222
+ # LRU eviction
223
+ while len(self._entries) > self._max_entries:
224
+ oldest_key, oldest_entry = self._entries.popitem(last=False)
225
+ # Clean up file if exists
226
+ if oldest_entry.file_path and os.path.exists(oldest_entry.file_path):
227
+ try:
228
+ os.remove(oldest_entry.file_path)
229
+ except OSError:
230
+ pass
231
+
232
+ def get(self, key: str) -> Optional[VisualizationEntry]:
233
+ """Get a visualization entry.
234
+
235
+ Args:
236
+ key: Cache key to look up
237
+
238
+ Returns:
239
+ VisualizationEntry if found, None otherwise
240
+ """
241
+ if key in self._entries:
242
+ self._entries.move_to_end(key) # LRU touch
243
+ return self._entries[key]
244
+ return None
245
+
246
+ def exists(self, key: str) -> bool:
247
+ """Check if a visualization exists in registry."""
248
+ return key in self._entries
249
+
250
+ def list_for_dataset(self, data_id: str) -> list[str]:
251
+ """List all visualization keys for a dataset.
252
+
253
+ Args:
254
+ data_id: Dataset identifier
255
+
256
+ Returns:
257
+ List of cache keys matching the dataset
258
+ """
259
+ prefix = f"{data_id}_"
260
+ return [k for k in self._entries.keys() if k.startswith(prefix)]
261
+
262
+ def clear(self, prefix: Optional[str] = None) -> int:
263
+ """Clear visualizations from registry.
264
+
265
+ Args:
266
+ prefix: Optional prefix to filter which keys to clear.
267
+ If None, clears all visualizations.
268
+
269
+ Returns:
270
+ Number of entries cleared
271
+ """
272
+ if prefix is None:
273
+ count = len(self._entries)
274
+ # Clean up all files
275
+ for entry in self._entries.values():
276
+ if entry.file_path and os.path.exists(entry.file_path):
277
+ try:
278
+ os.remove(entry.file_path)
279
+ except OSError:
280
+ pass
281
+ self._entries.clear()
282
+ return count
283
+
284
+ keys_to_remove = [k for k in self._entries if k.startswith(prefix)]
285
+ for key in keys_to_remove:
286
+ entry = self._entries.pop(key)
287
+ if entry.file_path and os.path.exists(entry.file_path):
288
+ try:
289
+ os.remove(entry.file_path)
290
+ except OSError:
291
+ pass
292
+ return len(keys_to_remove)
293
+
294
+ def keys(self) -> list[str]:
295
+ """Return all keys in the registry."""
296
+ return list(self._entries.keys())
297
+
298
+
299
+ class SpatialMCPAdapter:
300
+ """Main adapter class that bridges MCP and spatial analysis functionality.
301
+
302
+ Simplified design: Only manages data and visualization registry.
303
+ Removed dead code (ResourceManager was never registered to MCP server).
304
+ """
305
+
306
+ def __init__(self, mcp_server: FastMCP, data_manager: "DefaultSpatialDataManager"):
307
+ self.mcp = mcp_server
308
+ self.data_manager = data_manager
309
+ self.visualization_registry = VisualizationRegistry(max_entries=100)
310
+
311
+
312
+ class DefaultSpatialDataManager:
313
+ """Default implementation of spatial data management"""
314
+
315
+ def __init__(self):
316
+ self.data_store: dict[str, Any] = {}
317
+ self._next_id = 1
318
+
319
+ async def load_dataset(
320
+ self, path: str, data_type: str, name: Optional[str] = None
321
+ ) -> str:
322
+ """Load a spatial dataset and return its ID"""
323
+ from .utils.data_loader import load_spatial_data
324
+
325
+ # Load data
326
+ dataset_info = await load_spatial_data(path, data_type, name)
327
+
328
+ # Generate ID
329
+ data_id = f"data_{self._next_id}"
330
+ self._next_id += 1
331
+
332
+ # Store data
333
+ self.data_store[data_id] = dataset_info
334
+
335
+ return data_id
336
+
337
+ async def get_dataset(self, data_id: str) -> Any:
338
+ """Get a dataset by ID"""
339
+ if data_id not in self.data_store:
340
+ raise DataNotFoundError(f"Dataset {data_id} not found")
341
+ return self.data_store[data_id]
342
+
343
+ async def list_datasets(self) -> list[dict[str, Any]]:
344
+ """List all loaded datasets"""
345
+ return [
346
+ {
347
+ "id": data_id,
348
+ "name": info.get("name", f"Dataset {data_id}"),
349
+ "type": info.get("type", "unknown"),
350
+ "n_cells": info.get("n_cells", 0),
351
+ "n_genes": info.get("n_genes", 0),
352
+ }
353
+ for data_id, info in self.data_store.items()
354
+ ]
355
+
356
+ async def save_result(self, data_id: str, result_type: str, result: Any) -> None:
357
+ """Save analysis results"""
358
+ if data_id not in self.data_store:
359
+ raise DataNotFoundError(f"Dataset {data_id} not found")
360
+
361
+ if "results" not in self.data_store[data_id]:
362
+ self.data_store[data_id]["results"] = {}
363
+
364
+ self.data_store[data_id]["results"][result_type] = result
365
+
366
+ async def get_result(self, data_id: str, result_type: str) -> Any:
367
+ """Get analysis results"""
368
+ if data_id not in self.data_store:
369
+ raise DataNotFoundError(f"Dataset {data_id} not found")
370
+
371
+ results = self.data_store[data_id].get("results", {})
372
+ if result_type not in results:
373
+ raise DataNotFoundError(
374
+ f"No {result_type} results found for dataset {data_id}"
375
+ )
376
+
377
+ return results[result_type]
378
+
379
+ def dataset_exists(self, data_id: str) -> bool:
380
+ """Check if a dataset exists.
381
+
382
+ Args:
383
+ data_id: Dataset identifier
384
+
385
+ Returns:
386
+ True if the dataset exists, False otherwise
387
+ """
388
+ return data_id in self.data_store
389
+
390
+ async def update_adata(self, data_id: str, adata: Any) -> None:
391
+ """Update the adata object for an existing dataset.
392
+
393
+ Use this when preprocessing creates a new adata object (e.g., copy,
394
+ subsample, or format conversion).
395
+
396
+ Args:
397
+ data_id: Dataset identifier
398
+ adata: New AnnData object to store
399
+
400
+ Raises:
401
+ DataNotFoundError: If dataset not found
402
+ """
403
+ if data_id not in self.data_store:
404
+ raise DataNotFoundError(f"Dataset {data_id} not found")
405
+ self.data_store[data_id]["adata"] = adata
406
+
407
+ async def create_dataset(
408
+ self,
409
+ data_id: str,
410
+ adata: Any,
411
+ name: Optional[str] = None,
412
+ metadata: Optional[dict[str, Any]] = None,
413
+ ) -> None:
414
+ """Create a new dataset with specified ID.
415
+
416
+ Use this when creating derived datasets (e.g., integration results,
417
+ subset data).
418
+
419
+ Args:
420
+ data_id: Unique identifier for the new dataset
421
+ adata: AnnData object to store
422
+ name: Optional display name for the dataset
423
+ metadata: Optional additional metadata dict
424
+
425
+ Raises:
426
+ ParameterError: If dataset with same ID already exists
427
+ """
428
+ if data_id in self.data_store:
429
+ raise ParameterError(
430
+ f"Dataset {data_id} already exists. Use update_adata() to update."
431
+ )
432
+ dataset_info: dict[str, Any] = {"adata": adata}
433
+ if name:
434
+ dataset_info["name"] = name
435
+ if metadata:
436
+ dataset_info.update(metadata)
437
+ self.data_store[data_id] = dataset_info
438
+
439
+
440
+ @dataclass
441
+ class ToolContext:
442
+ """Unified context for ChatSpatial tool execution.
443
+
444
+ This class provides a clean interface for tools to access data and logging
445
+ without the redundant data_store dict wrapping pattern.
446
+
447
+ Design Rationale:
448
+ - Python dict assignment is reference, not copy. The old pattern of wrapping
449
+ dataset_info in a temp dict and "writing back" was completely unnecessary.
450
+ - Tools should access adata directly via get_adata(), not through dict wrapping.
451
+ - Logging methods fall back gracefully when MCP context is unavailable.
452
+
453
+ Logging Strategy:
454
+ - User-visible messages: await ctx.info(), await ctx.warning(), await ctx.error()
455
+ These appear in Claude's conversation and provide user-friendly progress updates.
456
+ - Developer debugging: ctx.debug()
457
+ This writes to Python logger for debugging, not visible to users.
458
+
459
+ Usage:
460
+ async def my_tool(data_id: str, ctx: ToolContext, params: Params) -> Result:
461
+ adata = await ctx.get_adata(data_id)
462
+ await ctx.info(f"Processing {adata.n_obs} cells") # User sees this
463
+ ctx.debug(f"Internal state: {some_detail}") # Developer log only
464
+ # ... analysis logic ...
465
+ return result
466
+ """
467
+
468
+ _data_manager: "DefaultSpatialDataManager"
469
+ _mcp_context: Optional[Context] = None
470
+ _visualization_registry: Optional["VisualizationRegistry"] = None
471
+ _logger: Optional[logging.Logger] = field(default=None, repr=False)
472
+
473
+ def __post_init__(self) -> None:
474
+ """Initialize the logger for debug messages."""
475
+ if self._logger is None:
476
+ self._logger = logging.getLogger("chatspatial.tools")
477
+
478
+ def debug(self, msg: str) -> None:
479
+ """Log debug message for developers (not visible to users).
480
+
481
+ Use this for detailed technical information that helps with debugging
482
+ but would be noise for end users. These messages go to Python logger.
483
+
484
+ Args:
485
+ msg: Debug message to log
486
+ """
487
+ if self._logger:
488
+ self._logger.debug(msg)
489
+
490
+ def log_config(self, title: str, config: dict[str, Any]) -> None:
491
+ """Log configuration details for developers.
492
+
493
+ Convenience method for logging parameter configurations in a
494
+ structured format. Goes to Python logger, not user-visible.
495
+
496
+ Args:
497
+ title: Configuration section title
498
+ config: Dictionary of configuration key-value pairs
499
+ """
500
+ if self._logger:
501
+ self._logger.debug("=" * 50)
502
+ self._logger.debug(f"{title}:")
503
+ for key, value in config.items():
504
+ self._logger.debug(f" {key}: {value}")
505
+ self._logger.debug("=" * 50)
506
+
507
+ async def get_adata(self, data_id: str) -> Any:
508
+ """Get AnnData object directly by ID.
509
+
510
+ This is the primary data access method for tools. Returns the AnnData
511
+ object directly without intermediate dict wrapping.
512
+
513
+ Args:
514
+ data_id: Dataset identifier
515
+
516
+ Returns:
517
+ AnnData object for the dataset
518
+
519
+ Raises:
520
+ ValueError: If dataset not found
521
+ """
522
+ dataset_info = await self._data_manager.get_dataset(data_id)
523
+ return dataset_info["adata"]
524
+
525
+ async def get_dataset_info(self, data_id: str) -> dict[str, Any]:
526
+ """Get full dataset info dict when metadata is needed.
527
+
528
+ Use this only when you need access to metadata beyond adata,
529
+ such as 'name', 'type', 'source_path', etc.
530
+ """
531
+ return await self._data_manager.get_dataset(data_id)
532
+
533
+ async def set_adata(self, data_id: str, adata: Any) -> None:
534
+ """Update the AnnData object for a dataset.
535
+
536
+ Use this when preprocessing creates a new adata object (e.g., copy,
537
+ subsample, or format conversion). This updates the reference in the
538
+ data manager's store.
539
+
540
+ Args:
541
+ data_id: Dataset identifier
542
+ adata: New AnnData object to store
543
+
544
+ Raises:
545
+ ValueError: If dataset not found
546
+ """
547
+ await self._data_manager.update_adata(data_id, adata)
548
+
549
+ async def add_dataset(
550
+ self,
551
+ data_id: str,
552
+ adata: Any,
553
+ name: Optional[str] = None,
554
+ metadata: Optional[dict[str, Any]] = None,
555
+ ) -> None:
556
+ """Add a new dataset to the data store.
557
+
558
+ Use this when creating new datasets (e.g., integration results,
559
+ subset data, or derived datasets).
560
+
561
+ Args:
562
+ data_id: Unique identifier for the new dataset
563
+ adata: AnnData object to store
564
+ name: Optional display name for the dataset
565
+ metadata: Optional additional metadata dict
566
+
567
+ Raises:
568
+ ValueError: If dataset with same ID already exists
569
+ """
570
+ await self._data_manager.create_dataset(data_id, adata, name, metadata)
571
+
572
+ async def info(self, msg: str) -> None:
573
+ """Log info message to MCP context if available."""
574
+ if self._mcp_context:
575
+ await self._mcp_context.info(msg)
576
+
577
+ async def warning(self, msg: str) -> None:
578
+ """Log warning message to MCP context if available."""
579
+ if self._mcp_context:
580
+ await self._mcp_context.warning(msg)
581
+
582
+ async def error(self, msg: str) -> None:
583
+ """Log error message to MCP context if available."""
584
+ if self._mcp_context:
585
+ await self._mcp_context.error(msg)
586
+
587
+ def get_visualization_registry(self) -> Optional["VisualizationRegistry"]:
588
+ """Get the visualization registry.
589
+
590
+ Returns:
591
+ VisualizationRegistry instance if set, None otherwise
592
+ """
593
+ return self._visualization_registry
594
+
595
+ def store_visualization(
596
+ self,
597
+ key: str,
598
+ params: VisualizationParameters,
599
+ file_path: Optional[str] = None,
600
+ ) -> None:
601
+ """Store a visualization in the registry.
602
+
603
+ Args:
604
+ key: Cache key format: {data_id}_{plot_type}[_{subtype}]
605
+ params: Visualization parameters for regeneration
606
+ file_path: Optional path to saved image file
607
+ """
608
+ if self._visualization_registry is not None:
609
+ self._visualization_registry.store(key, params, file_path)
610
+
611
+ def get_visualization(self, key: str) -> Optional[VisualizationEntry]:
612
+ """Get a visualization entry from the registry.
613
+
614
+ Args:
615
+ key: Cache key for the visualization
616
+
617
+ Returns:
618
+ VisualizationEntry if found, None otherwise
619
+ """
620
+ if self._visualization_registry is None:
621
+ return None
622
+ return self._visualization_registry.get(key)
623
+
624
+ def visualization_exists(self, key: str) -> bool:
625
+ """Check if a visualization exists in registry.
626
+
627
+ Args:
628
+ key: Cache key to check
629
+
630
+ Returns:
631
+ True if exists, False otherwise
632
+ """
633
+ if self._visualization_registry is None:
634
+ return False
635
+ return self._visualization_registry.exists(key)
636
+
637
+ def list_visualizations(self, data_id: str) -> list[str]:
638
+ """List all visualization keys for a dataset.
639
+
640
+ Args:
641
+ data_id: Dataset identifier
642
+
643
+ Returns:
644
+ List of cache keys matching the dataset
645
+ """
646
+ if self._visualization_registry is None:
647
+ return []
648
+ return self._visualization_registry.list_for_dataset(data_id)
649
+
650
+ def clear_visualizations(self, prefix: Optional[str] = None) -> int:
651
+ """Clear visualizations from the registry.
652
+
653
+ Args:
654
+ prefix: Optional prefix to filter which keys to clear.
655
+ If None, clears all visualizations.
656
+
657
+ Returns:
658
+ Number of visualizations cleared
659
+ """
660
+ if self._visualization_registry is None:
661
+ return 0
662
+ return self._visualization_registry.clear(prefix)
663
+
664
+
665
+ def create_spatial_mcp_server(
666
+ server_name: str = "ChatSpatial",
667
+ data_manager: Optional[DefaultSpatialDataManager] = None,
668
+ ) -> tuple[FastMCP, SpatialMCPAdapter]:
669
+ """
670
+ Create and configure a spatial MCP server with adapter
671
+
672
+ Args:
673
+ server_name: Name of the MCP server
674
+ data_manager: Optional custom data manager (uses default if None)
675
+
676
+ Returns:
677
+ Tuple of (FastMCP server instance, SpatialMCPAdapter instance)
678
+ """
679
+ # Server instructions for LLM guidance on tool usage
680
+ instructions = """ChatSpatial provides spatial transcriptomics analysis through 60+ integrated methods across 15 analytical categories.
681
+
682
+ CORE WORKFLOW PATTERN:
683
+ 1. Always start with load_data() to import spatial transcriptomics data
684
+ 2. Run preprocess_data() before most analytical tools (required for clustering, spatial analysis, etc.)
685
+ 3. Use visualize_data() to inspect results after each analysis step
686
+
687
+ CRITICAL OPERATIONAL CONSTRAINTS:
688
+ - Preprocessing creates filtered gene sets for efficiency but preserves raw data in adata.raw
689
+ - Cell communication analysis automatically uses adata.raw when available for comprehensive gene coverage
690
+ - Species-specific parameters are critical: set species="mouse" or "human" and use appropriate resources (e.g., liana_resource="mouseconsensus" for mouse)
691
+ - Reference data for annotation methods (tangram, scanvi) must be PREPROCESSED before use
692
+
693
+ PLATFORM-SPECIFIC GUIDANCE:
694
+ - Spot-based platforms (Visium, Slide-seq): Deconvolution is recommended to infer cell type compositions
695
+ - Single-cell platforms (MERFISH, Xenium, CosMx): Skip deconvolution - native single-cell resolution provided
696
+ - Visium with histology images: Use SpaGCN for spatial domain identification
697
+ - High-resolution data without images: Use STAGATE or GraphST
698
+
699
+ TOOL RELATIONSHIPS:
700
+ - Spatial domain identification → Enables spatial statistics (neighborhood enrichment, co-occurrence)
701
+ - Cell type annotation → Required for cell communication analysis
702
+ - Deconvolution results → Can be used for downstream spatial statistics
703
+ - Integration → Recommended before cross-sample comparative analyses
704
+
705
+ PARAMETER GUIDANCE:
706
+ All tools include comprehensive parameter documentation in their schemas. Refer to tool descriptions for default values, platform-specific optimizations, and method-specific requirements.
707
+
708
+ For multi-step analyses, preserve data_id across operations to maintain analysis continuity."""
709
+
710
+ # Create MCP server with instructions
711
+ mcp = FastMCP(server_name, instructions=instructions)
712
+
713
+ # Create data manager if not provided
714
+ if data_manager is None:
715
+ data_manager = DefaultSpatialDataManager()
716
+
717
+ # Create adapter
718
+ adapter = SpatialMCPAdapter(mcp, data_manager)
719
+
720
+ return mcp, adapter