chatspatial 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. chatspatial/__init__.py +11 -0
  2. chatspatial/__main__.py +141 -0
  3. chatspatial/cli/__init__.py +7 -0
  4. chatspatial/config.py +53 -0
  5. chatspatial/models/__init__.py +85 -0
  6. chatspatial/models/analysis.py +513 -0
  7. chatspatial/models/data.py +2462 -0
  8. chatspatial/server.py +1763 -0
  9. chatspatial/spatial_mcp_adapter.py +720 -0
  10. chatspatial/tools/__init__.py +3 -0
  11. chatspatial/tools/annotation.py +1903 -0
  12. chatspatial/tools/cell_communication.py +1603 -0
  13. chatspatial/tools/cnv_analysis.py +605 -0
  14. chatspatial/tools/condition_comparison.py +595 -0
  15. chatspatial/tools/deconvolution/__init__.py +402 -0
  16. chatspatial/tools/deconvolution/base.py +318 -0
  17. chatspatial/tools/deconvolution/card.py +244 -0
  18. chatspatial/tools/deconvolution/cell2location.py +326 -0
  19. chatspatial/tools/deconvolution/destvi.py +144 -0
  20. chatspatial/tools/deconvolution/flashdeconv.py +101 -0
  21. chatspatial/tools/deconvolution/rctd.py +317 -0
  22. chatspatial/tools/deconvolution/spotlight.py +216 -0
  23. chatspatial/tools/deconvolution/stereoscope.py +109 -0
  24. chatspatial/tools/deconvolution/tangram.py +135 -0
  25. chatspatial/tools/differential.py +625 -0
  26. chatspatial/tools/embeddings.py +298 -0
  27. chatspatial/tools/enrichment.py +1863 -0
  28. chatspatial/tools/integration.py +807 -0
  29. chatspatial/tools/preprocessing.py +723 -0
  30. chatspatial/tools/spatial_domains.py +808 -0
  31. chatspatial/tools/spatial_genes.py +836 -0
  32. chatspatial/tools/spatial_registration.py +441 -0
  33. chatspatial/tools/spatial_statistics.py +1476 -0
  34. chatspatial/tools/trajectory.py +495 -0
  35. chatspatial/tools/velocity.py +405 -0
  36. chatspatial/tools/visualization/__init__.py +155 -0
  37. chatspatial/tools/visualization/basic.py +393 -0
  38. chatspatial/tools/visualization/cell_comm.py +699 -0
  39. chatspatial/tools/visualization/cnv.py +320 -0
  40. chatspatial/tools/visualization/core.py +684 -0
  41. chatspatial/tools/visualization/deconvolution.py +852 -0
  42. chatspatial/tools/visualization/enrichment.py +660 -0
  43. chatspatial/tools/visualization/integration.py +205 -0
  44. chatspatial/tools/visualization/main.py +164 -0
  45. chatspatial/tools/visualization/multi_gene.py +739 -0
  46. chatspatial/tools/visualization/persistence.py +335 -0
  47. chatspatial/tools/visualization/spatial_stats.py +469 -0
  48. chatspatial/tools/visualization/trajectory.py +639 -0
  49. chatspatial/tools/visualization/velocity.py +411 -0
  50. chatspatial/utils/__init__.py +115 -0
  51. chatspatial/utils/adata_utils.py +1372 -0
  52. chatspatial/utils/compute.py +327 -0
  53. chatspatial/utils/data_loader.py +499 -0
  54. chatspatial/utils/dependency_manager.py +462 -0
  55. chatspatial/utils/device_utils.py +165 -0
  56. chatspatial/utils/exceptions.py +185 -0
  57. chatspatial/utils/image_utils.py +267 -0
  58. chatspatial/utils/mcp_utils.py +137 -0
  59. chatspatial/utils/path_utils.py +243 -0
  60. chatspatial/utils/persistence.py +78 -0
  61. chatspatial/utils/scipy_compat.py +143 -0
  62. chatspatial-1.1.0.dist-info/METADATA +242 -0
  63. chatspatial-1.1.0.dist-info/RECORD +67 -0
  64. chatspatial-1.1.0.dist-info/WHEEL +5 -0
  65. chatspatial-1.1.0.dist-info/entry_points.txt +2 -0
  66. chatspatial-1.1.0.dist-info/licenses/LICENSE +21 -0
  67. chatspatial-1.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,513 @@
1
+ """
2
+ Analysis result models for spatial transcriptomics data.
3
+ """
4
+
5
+ from typing import TYPE_CHECKING, Any, Optional
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+
9
+ if TYPE_CHECKING:
10
+ from mcp.types import ImageContent
11
+ else:
12
+ try:
13
+ from mcp.types import ImageContent
14
+ except ImportError:
15
+ # Fallback for when MCP is not available
16
+ ImageContent = Any # type: ignore[misc,assignment]
17
+
18
+
19
+ class BaseAnalysisResult(BaseModel):
20
+ """Base class for all analysis results.
21
+
22
+ Provides common configuration and optional shared fields.
23
+ All analysis result models should inherit from this class.
24
+ """
25
+
26
+ model_config = ConfigDict(arbitrary_types_allowed=True)
27
+
28
+
29
+ class PreprocessingResult(BaseAnalysisResult):
30
+ """Result of data preprocessing"""
31
+
32
+ data_id: str
33
+ n_cells: int
34
+ n_genes: int
35
+ n_hvgs: int
36
+ clusters: int
37
+ qc_metrics: Optional[dict[str, Any]] = None
38
+
39
+
40
+ class DifferentialExpressionResult(BaseAnalysisResult):
41
+ """Result of differential expression analysis
42
+
43
+ Note on serialization:
44
+ For consistency with other result models, the statistics dict is excluded
45
+ from JSON serialization. Key summary info is in explicit fields.
46
+
47
+ Fields included in MCP response:
48
+ - data_id, comparison (basic info)
49
+ - n_genes (count)
50
+ - top_genes (top differentially expressed genes)
51
+
52
+ Fields excluded from MCP response:
53
+ - statistics (detailed DE metrics per group)
54
+ """
55
+
56
+ data_id: str
57
+ comparison: str
58
+ n_genes: int
59
+ top_genes: list[str] = Field(default_factory=list)
60
+
61
+ # Detailed statistics - excluded from MCP response
62
+ statistics: dict[str, Any] = Field(
63
+ default_factory=dict,
64
+ exclude=True, # Exclude from JSON serialization to LLM
65
+ )
66
+
67
+
68
+ class AnnotationResult(BaseAnalysisResult):
69
+ """Result of cell type annotation
70
+
71
+ Attributes:
72
+ data_id: Dataset identifier
73
+ method: Annotation method used
74
+ output_key: Column name in adata.obs where cell types are stored (e.g., "cell_type_tangram")
75
+ confidence_key: Column name in adata.obs where confidence scores are stored (e.g., "confidence_tangram")
76
+ cell_types: List of unique cell types identified
77
+ counts: Number of cells per cell type
78
+ confidence_scores: Confidence scores per cell type (when available).
79
+ Empty dict or None indicates no confidence data available.
80
+ Only contains real statistical measures, never arbitrary values.
81
+ tangram_mapping_score: For Tangram method - overall mapping quality score
82
+ """
83
+
84
+ data_id: str
85
+ method: str
86
+ output_key: str # Column name where cell types are stored
87
+ confidence_key: Optional[str] = (
88
+ None # Column name where confidence scores are stored
89
+ )
90
+ cell_types: list[str]
91
+ counts: dict[str, int]
92
+ confidence_scores: Optional[dict[str, float]] = None
93
+ tangram_mapping_score: Optional[float] = None # For Tangram method - mapping score
94
+
95
+
96
+ class SpatialStatisticsResult(BaseAnalysisResult):
97
+ """Result of spatial analysis
98
+
99
+ Note on serialization:
100
+ To minimize MCP response size, detailed per-gene/per-spot statistics are
101
+ excluded from JSON serialization using Field(exclude=True). Summary fields
102
+ are always included.
103
+
104
+ Fields included in MCP response:
105
+ - data_id, analysis_type (basic info)
106
+ - n_features_analyzed, n_significant (summary counts)
107
+ - top_features (top significant genes/clusters)
108
+ - summary_metrics (compact key metrics)
109
+ - results_key (for accessing full results)
110
+
111
+ Fields excluded from MCP response (stored in adata):
112
+ - statistics (full detailed results dict)
113
+
114
+ Visualization is handled separately via the visualize_data tool.
115
+ """
116
+
117
+ data_id: str
118
+ analysis_type: str
119
+
120
+ # Summary fields - always included in MCP response
121
+ n_features_analyzed: int = 0
122
+ n_significant: int = 0
123
+ top_features: list[str] = Field(default_factory=list)
124
+ summary_metrics: dict[str, float] = Field(default_factory=dict)
125
+ results_key: Optional[str] = None # Key in adata.uns for full results
126
+
127
+ # Detailed statistics - excluded from MCP response
128
+ statistics: Optional[dict[str, Any]] = Field(
129
+ default=None,
130
+ exclude=True, # Exclude from JSON serialization to LLM
131
+ )
132
+
133
+
134
+ class RNAVelocityResult(BaseAnalysisResult):
135
+ """Result of RNA velocity analysis"""
136
+
137
+ data_id: str
138
+ velocity_computed: bool
139
+ velocity_graph_key: Optional[str] = None # Key for velocity graph in adata.uns
140
+ mode: str # RNA velocity computation mode
141
+
142
+
143
+ class TrajectoryResult(BaseAnalysisResult):
144
+ """Result of trajectory analysis"""
145
+
146
+ data_id: str
147
+ pseudotime_computed: bool
148
+ velocity_computed: bool
149
+ pseudotime_key: str
150
+ method: str # Trajectory analysis method used
151
+ spatial_weight: float # Spatial information weight
152
+
153
+
154
+ class IntegrationResult(BaseAnalysisResult):
155
+ """Result of sample integration"""
156
+
157
+ data_id: str
158
+ n_samples: int
159
+ integration_method: str
160
+
161
+
162
+ class DeconvolutionResult(BaseAnalysisResult):
163
+ """Result of spatial deconvolution
164
+
165
+ Note on serialization:
166
+ To minimize MCP response size, detailed per-cell-type statistics are
167
+ excluded from JSON serialization using Field(exclude=True).
168
+
169
+ Fields included in MCP response:
170
+ - data_id, method, n_cell_types, cell_types (basic info)
171
+ - n_spots, genes_used (summary counts)
172
+ - dominant_type_key, proportions_key (storage keys)
173
+
174
+ Fields excluded from MCP response (stored in adata):
175
+ - statistics (includes mean_proportions, dominant_types dicts)
176
+ """
177
+
178
+ data_id: str
179
+ method: str
180
+ dominant_type_key: str # Column name where dominant cell type is stored
181
+ cell_types: list[str]
182
+ n_cell_types: int
183
+ proportions_key: str # Key in adata.obsm where cell type proportions are stored
184
+
185
+ # Summary fields - always included
186
+ n_spots: int = 0
187
+ genes_used: int = 0
188
+
189
+ # Detailed statistics - excluded from MCP response
190
+ statistics: dict[str, Any] = Field(
191
+ default_factory=dict,
192
+ exclude=True, # Exclude from JSON serialization to LLM
193
+ )
194
+
195
+
196
+ class SpatialDomainResult(BaseAnalysisResult):
197
+ """Result of spatial domain identification
198
+
199
+ Note on serialization:
200
+ For consistency with other result models, the detailed statistics dict
201
+ is excluded from JSON serialization. Key summary info is in explicit fields.
202
+
203
+ Fields included in MCP response:
204
+ - data_id, method, n_domains (basic info)
205
+ - domain_key, refined_domain_key, embeddings_key (storage keys)
206
+ - domain_counts (number of spots per domain - typically compact)
207
+
208
+ Fields excluded from MCP response:
209
+ - statistics (method parameters, stored in adata.uns)
210
+ """
211
+
212
+ data_id: str
213
+ method: str
214
+ n_domains: int
215
+ domain_key: str # Key in adata.obs where domain labels are stored
216
+ domain_counts: dict[str, int] # Number of spots in each domain
217
+ refined_domain_key: Optional[str] = (
218
+ None # Key for refined domains if refinement was applied
219
+ )
220
+ embeddings_key: Optional[str] = (
221
+ None # Key in adata.obsm where embeddings are stored
222
+ )
223
+
224
+ # Detailed statistics - excluded from MCP response
225
+ statistics: dict[str, Any] = Field(
226
+ default_factory=dict,
227
+ exclude=True, # Exclude from JSON serialization to LLM
228
+ )
229
+
230
+
231
+ class SpatialVariableGenesResult(BaseAnalysisResult):
232
+ """Result of spatial variable genes identification.
233
+
234
+ Note on serialization:
235
+ To minimize MCP response size, detailed statistics are excluded from
236
+ JSON serialization using Field(exclude=True). These fields are still
237
+ stored in the Python object and saved to adata.var for downstream
238
+ visualization and export.
239
+
240
+ Access complete statistics via:
241
+ - adata.var['spatialde_pval'], adata.var['spatialde_qval'] (SpatialDE)
242
+ - adata.var['sparkx_pval'], adata.var['sparkx_qval'] (SPARK-X)
243
+ """
244
+
245
+ data_id: str
246
+ method: str # Method used for analysis
247
+
248
+ # Summary statistics - always returned to LLM
249
+ n_genes_analyzed: int # Total number of genes analyzed
250
+ n_significant_genes: int # Total significant genes found (q < 0.05)
251
+
252
+ # Top spatial genes - returned to LLM (truncated for token efficiency)
253
+ spatial_genes: list[str]
254
+
255
+ # Storage key for accessing full results in adata
256
+ results_key: str
257
+
258
+ # ============================================================
259
+ # Fields excluded from MCP response (stored in adata.var)
260
+ # ============================================================
261
+ gene_statistics: dict[str, float] = Field(
262
+ default_factory=dict,
263
+ exclude=True, # Exclude from JSON serialization to LLM
264
+ )
265
+ p_values: dict[str, float] = Field(
266
+ default_factory=dict,
267
+ exclude=True,
268
+ )
269
+ q_values: dict[str, float] = Field(
270
+ default_factory=dict,
271
+ exclude=True,
272
+ )
273
+ spatialde_results: Optional[dict[str, Any]] = Field(
274
+ default=None,
275
+ exclude=True,
276
+ )
277
+ sparkx_results: Optional[dict[str, Any]] = Field(
278
+ default=None,
279
+ exclude=True,
280
+ )
281
+
282
+
283
+ class CellCommunicationResult(BaseAnalysisResult):
284
+ """Result of cell-cell communication analysis
285
+
286
+ Note on serialization:
287
+ To minimize MCP response size, detailed statistics are excluded from
288
+ JSON serialization. Key summary info is in explicit fields.
289
+
290
+ Fields included in MCP response:
291
+ - data_id, method, species, database (basic info)
292
+ - n_lr_pairs, n_significant_pairs, top_lr_pairs (summary)
293
+ - Various *_key fields (storage keys for accessing full results)
294
+
295
+ Fields excluded from MCP response:
296
+ - statistics (detailed analysis metrics)
297
+ """
298
+
299
+ data_id: str
300
+ method: str
301
+ species: str
302
+ database: str
303
+ n_lr_pairs: int # Total number of LR pairs tested
304
+ n_significant_pairs: int # Number of significant LR pairs
305
+
306
+ # Global analysis results
307
+ global_results_key: Optional[str] = (
308
+ None # Key in adata.uns where global results are stored
309
+ )
310
+ top_lr_pairs: list[str] = Field(default_factory=list) # Top significant LR pairs
311
+
312
+ # Local analysis results (if performed)
313
+ local_analysis_performed: bool = False
314
+ local_results_key: Optional[str] = (
315
+ None # Key in adata.uns where local results are stored
316
+ )
317
+ communication_matrices_key: Optional[str] = (
318
+ None # Key in adata.obsp where communication matrices are stored
319
+ )
320
+
321
+ # LIANA+ specific results
322
+ liana_results_key: Optional[str] = (
323
+ None # Key in adata.uns for LIANA cluster results
324
+ )
325
+ liana_spatial_results_key: Optional[str] = (
326
+ None # Key in adata.uns for LIANA spatial results
327
+ )
328
+ liana_spatial_scores_key: Optional[str] = (
329
+ None # Key in adata.obsm for spatial scores
330
+ )
331
+ analysis_type: Optional[str] = (
332
+ None # Type of LIANA analysis: 'cluster' or 'spatial'
333
+ )
334
+
335
+ # Communication patterns (if identified)
336
+ patterns_identified: bool = False
337
+ n_patterns: Optional[int] = None
338
+ patterns_key: Optional[str] = (
339
+ None # Key in adata.obs where communication patterns are stored
340
+ )
341
+
342
+ # Detailed statistics - excluded from MCP response
343
+ statistics: dict[str, Any] = Field(
344
+ default_factory=dict,
345
+ exclude=True, # Exclude from JSON serialization to LLM
346
+ )
347
+
348
+
349
+ class EnrichmentResult(BaseAnalysisResult):
350
+ """Result from gene set enrichment analysis
351
+
352
+ Note on serialization:
353
+ To minimize MCP response size (~12k tokens -> ~0.5k tokens), large
354
+ dictionaries are excluded from JSON serialization using Field(exclude=True).
355
+ These fields are still stored in the Python object and saved to adata.uns
356
+ for downstream visualization.
357
+
358
+ Fields included in MCP response (sent to LLM):
359
+ - method, n_gene_sets, n_significant (basic info)
360
+ - top_gene_sets, top_depleted_sets (top 10 pathway names)
361
+ - spatial_scores_key (for spatial methods)
362
+
363
+ Fields excluded from MCP response (stored in adata.uns):
364
+ - enrichment_scores, pvalues, adjusted_pvalues (full dicts)
365
+ - gene_set_statistics (detailed stats per pathway)
366
+ - spatial_metrics (spatial autocorrelation data)
367
+ """
368
+
369
+ # Basic information - always included in MCP response
370
+ method: str # Method used (pathway_gsea, pathway_ora, etc.)
371
+ n_gene_sets: int # Number of gene sets analyzed
372
+ n_significant: int # Number of significant gene sets
373
+
374
+ # Top results - always included (compact, just pathway names)
375
+ top_gene_sets: list[str] # Top enriched gene sets (max 10)
376
+ top_depleted_sets: list[str] # Top depleted gene sets (max 10)
377
+
378
+ # Spatial info key - included
379
+ spatial_scores_key: Optional[str] = None # Key in adata.obsm
380
+
381
+ # ============================================================
382
+ # EXCLUDED FROM MCP RESPONSE - stored in adata.uns for viz
383
+ # Full data available via visualize_data() tool
384
+ # ============================================================
385
+ enrichment_scores: dict[str, float] = Field(
386
+ default_factory=dict,
387
+ exclude=True, # Exclude from JSON serialization to LLM
388
+ )
389
+ pvalues: Optional[dict[str, float]] = Field(
390
+ default=None,
391
+ exclude=True,
392
+ )
393
+ adjusted_pvalues: Optional[dict[str, float]] = Field(
394
+ default=None,
395
+ exclude=True,
396
+ )
397
+ gene_set_statistics: dict[str, dict[str, Any]] = Field(
398
+ default_factory=dict,
399
+ exclude=True,
400
+ )
401
+ spatial_metrics: Optional[dict[str, Any]] = Field(
402
+ default=None,
403
+ exclude=True,
404
+ )
405
+
406
+
407
+ class CNVResult(BaseAnalysisResult):
408
+ """Result of Copy Number Variation (CNV) analysis
409
+
410
+ Note on serialization:
411
+ For consistency with other result models, the statistics dict is excluded
412
+ from JSON serialization. Key summary info is in explicit fields.
413
+
414
+ Fields included in MCP response:
415
+ - data_id, method, reference_key, reference_categories (basic info)
416
+ - n_chromosomes, n_genes_analyzed (summary counts)
417
+ - cnv_score_key (storage key)
418
+ - visualization_available (status flag)
419
+
420
+ Fields excluded from MCP response:
421
+ - statistics (detailed CNV metrics)
422
+ """
423
+
424
+ data_id: str
425
+ method: str # Method used (e.g., "infercnvpy")
426
+ reference_key: str # Column used for reference cells
427
+ reference_categories: list[str] # Categories used as reference
428
+ n_chromosomes: int # Number of chromosomes analyzed
429
+ n_genes_analyzed: int # Number of genes analyzed
430
+ cnv_score_key: Optional[str] = None # Key in adata.obsm (e.g., "X_cnv")
431
+ visualization_available: bool = False # Whether visualization is available
432
+
433
+ # Detailed statistics - excluded from MCP response
434
+ statistics: Optional[dict[str, Any]] = Field(
435
+ default=None,
436
+ exclude=True, # Exclude from JSON serialization to LLM
437
+ )
438
+
439
+
440
+ class DEGene(BaseAnalysisResult):
441
+ """A single differentially expressed gene with statistics"""
442
+
443
+ gene: str
444
+ log2fc: float
445
+ pvalue: float
446
+ padj: float
447
+ mean_expr_condition1: Optional[float] = None
448
+ mean_expr_condition2: Optional[float] = None
449
+
450
+
451
+ class CellTypeComparisonResult(BaseAnalysisResult):
452
+ """Differential expression result for a single cell type"""
453
+
454
+ cell_type: str
455
+ n_cells_condition1: int
456
+ n_cells_condition2: int
457
+ n_samples_condition1: int
458
+ n_samples_condition2: int
459
+ n_significant_genes: int
460
+ top_upregulated: list[DEGene] # Upregulated in condition1
461
+ top_downregulated: list[DEGene] # Downregulated in condition1
462
+ all_de_genes: list[DEGene] = Field(
463
+ default_factory=list,
464
+ exclude=True, # Exclude from MCP response to reduce size
465
+ )
466
+
467
+
468
+ class ConditionComparisonResult(BaseAnalysisResult):
469
+ """Result of multi-sample condition comparison analysis.
470
+
471
+ Attributes:
472
+ data_id: Dataset identifier
473
+ method: Method used for differential expression
474
+ comparison: Human-readable comparison string (e.g., "Treatment vs Control")
475
+ condition_key: Column used for condition grouping
476
+ condition1: First condition (experimental group)
477
+ condition2: Second condition (reference group)
478
+ sample_key: Column used for sample identification
479
+ cell_type_key: Column used for cell type stratification (if provided)
480
+ n_samples_condition1: Number of samples in condition1
481
+ n_samples_condition2: Number of samples in condition2
482
+ global_results: Results when no cell type stratification (cell_type_key=None)
483
+ cell_type_results: Results stratified by cell type (when cell_type_key provided)
484
+ results_key: Key in adata.uns where full results are stored
485
+ statistics: Overall statistics about the comparison
486
+ """
487
+
488
+ data_id: str
489
+ method: str
490
+ comparison: str
491
+ condition_key: str
492
+ condition1: str
493
+ condition2: str
494
+ sample_key: str
495
+ cell_type_key: Optional[str] = None
496
+
497
+ # Sample counts
498
+ n_samples_condition1: int
499
+ n_samples_condition2: int
500
+
501
+ # Global results (when cell_type_key is None)
502
+ global_n_significant: Optional[int] = None
503
+ global_top_upregulated: Optional[list[DEGene]] = None
504
+ global_top_downregulated: Optional[list[DEGene]] = None
505
+
506
+ # Cell type stratified results (when cell_type_key is provided)
507
+ cell_type_results: Optional[list[CellTypeComparisonResult]] = None
508
+
509
+ # Storage keys
510
+ results_key: str # Key in adata.uns for full results
511
+
512
+ # Summary statistics
513
+ statistics: dict[str, Any]