chatspatial 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatspatial/__init__.py +11 -0
- chatspatial/__main__.py +141 -0
- chatspatial/cli/__init__.py +7 -0
- chatspatial/config.py +53 -0
- chatspatial/models/__init__.py +85 -0
- chatspatial/models/analysis.py +513 -0
- chatspatial/models/data.py +2462 -0
- chatspatial/server.py +1763 -0
- chatspatial/spatial_mcp_adapter.py +720 -0
- chatspatial/tools/__init__.py +3 -0
- chatspatial/tools/annotation.py +1903 -0
- chatspatial/tools/cell_communication.py +1603 -0
- chatspatial/tools/cnv_analysis.py +605 -0
- chatspatial/tools/condition_comparison.py +595 -0
- chatspatial/tools/deconvolution/__init__.py +402 -0
- chatspatial/tools/deconvolution/base.py +318 -0
- chatspatial/tools/deconvolution/card.py +244 -0
- chatspatial/tools/deconvolution/cell2location.py +326 -0
- chatspatial/tools/deconvolution/destvi.py +144 -0
- chatspatial/tools/deconvolution/flashdeconv.py +101 -0
- chatspatial/tools/deconvolution/rctd.py +317 -0
- chatspatial/tools/deconvolution/spotlight.py +216 -0
- chatspatial/tools/deconvolution/stereoscope.py +109 -0
- chatspatial/tools/deconvolution/tangram.py +135 -0
- chatspatial/tools/differential.py +625 -0
- chatspatial/tools/embeddings.py +298 -0
- chatspatial/tools/enrichment.py +1863 -0
- chatspatial/tools/integration.py +807 -0
- chatspatial/tools/preprocessing.py +723 -0
- chatspatial/tools/spatial_domains.py +808 -0
- chatspatial/tools/spatial_genes.py +836 -0
- chatspatial/tools/spatial_registration.py +441 -0
- chatspatial/tools/spatial_statistics.py +1476 -0
- chatspatial/tools/trajectory.py +495 -0
- chatspatial/tools/velocity.py +405 -0
- chatspatial/tools/visualization/__init__.py +155 -0
- chatspatial/tools/visualization/basic.py +393 -0
- chatspatial/tools/visualization/cell_comm.py +699 -0
- chatspatial/tools/visualization/cnv.py +320 -0
- chatspatial/tools/visualization/core.py +684 -0
- chatspatial/tools/visualization/deconvolution.py +852 -0
- chatspatial/tools/visualization/enrichment.py +660 -0
- chatspatial/tools/visualization/integration.py +205 -0
- chatspatial/tools/visualization/main.py +164 -0
- chatspatial/tools/visualization/multi_gene.py +739 -0
- chatspatial/tools/visualization/persistence.py +335 -0
- chatspatial/tools/visualization/spatial_stats.py +469 -0
- chatspatial/tools/visualization/trajectory.py +639 -0
- chatspatial/tools/visualization/velocity.py +411 -0
- chatspatial/utils/__init__.py +115 -0
- chatspatial/utils/adata_utils.py +1372 -0
- chatspatial/utils/compute.py +327 -0
- chatspatial/utils/data_loader.py +499 -0
- chatspatial/utils/dependency_manager.py +462 -0
- chatspatial/utils/device_utils.py +165 -0
- chatspatial/utils/exceptions.py +185 -0
- chatspatial/utils/image_utils.py +267 -0
- chatspatial/utils/mcp_utils.py +137 -0
- chatspatial/utils/path_utils.py +243 -0
- chatspatial/utils/persistence.py +78 -0
- chatspatial/utils/scipy_compat.py +143 -0
- chatspatial-1.1.0.dist-info/METADATA +242 -0
- chatspatial-1.1.0.dist-info/RECORD +67 -0
- chatspatial-1.1.0.dist-info/WHEEL +5 -0
- chatspatial-1.1.0.dist-info/entry_points.txt +2 -0
- chatspatial-1.1.0.dist-info/licenses/LICENSE +21 -0
- chatspatial-1.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base utilities for deconvolution methods.
|
|
3
|
+
|
|
4
|
+
Design Philosophy:
|
|
5
|
+
- Immutable data container (frozen dataclass) for prepared data
|
|
6
|
+
- Single function API for the common case
|
|
7
|
+
- Hook pattern for method-specific preprocessing (e.g., cell2location)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from collections.abc import Awaitable, Callable
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import (
|
|
13
|
+
TYPE_CHECKING,
|
|
14
|
+
Any,
|
|
15
|
+
Optional,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
import anndata as ad
|
|
19
|
+
import numpy as np
|
|
20
|
+
import pandas as pd
|
|
21
|
+
from numpy.typing import NDArray
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from ...spatial_mcp_adapter import ToolContext
|
|
25
|
+
|
|
26
|
+
from ...utils.adata_utils import (
|
|
27
|
+
find_common_genes,
|
|
28
|
+
get_raw_data_source,
|
|
29
|
+
get_spatial_key,
|
|
30
|
+
to_dense,
|
|
31
|
+
validate_gene_overlap,
|
|
32
|
+
validate_obs_column,
|
|
33
|
+
)
|
|
34
|
+
from ...utils.exceptions import DataError
|
|
35
|
+
|
|
36
|
+
# Type alias for preprocess hook
|
|
37
|
+
PreprocessHook = Callable[
|
|
38
|
+
[ad.AnnData, ad.AnnData, "ToolContext"],
|
|
39
|
+
Awaitable[tuple[ad.AnnData, ad.AnnData]],
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# =============================================================================
|
|
44
|
+
# Immutable Data Container
|
|
45
|
+
# =============================================================================
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(frozen=True)
|
|
49
|
+
class PreparedDeconvolutionData:
|
|
50
|
+
"""Immutable container for prepared deconvolution data.
|
|
51
|
+
|
|
52
|
+
All fields are populated by prepare_deconvolution() and cannot be modified.
|
|
53
|
+
This eliminates state machine complexity and makes data flow explicit.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
spatial: Spatial AnnData subset to common genes (raw counts)
|
|
57
|
+
reference: Reference AnnData subset to common genes (raw counts)
|
|
58
|
+
cell_type_key: Column name for cell types in reference
|
|
59
|
+
cell_types: List of unique cell types
|
|
60
|
+
common_genes: List of genes present in both datasets
|
|
61
|
+
spatial_coords: Spatial coordinates array (n_spots, 2) or None
|
|
62
|
+
ctx: ToolContext for logging/warnings
|
|
63
|
+
|
|
64
|
+
Usage:
|
|
65
|
+
data = await prepare_deconvolution(spatial, ref, "cell_type", ctx)
|
|
66
|
+
proportions = run_method(data.spatial, data.reference, data.cell_types)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
spatial: ad.AnnData
|
|
70
|
+
reference: ad.AnnData
|
|
71
|
+
cell_type_key: str
|
|
72
|
+
cell_types: list[str]
|
|
73
|
+
common_genes: list[str]
|
|
74
|
+
spatial_coords: Optional[NDArray[np.floating]]
|
|
75
|
+
ctx: "ToolContext"
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def n_spots(self) -> int:
|
|
79
|
+
"""Number of spatial spots."""
|
|
80
|
+
return self.spatial.n_obs
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def n_cell_types(self) -> int:
|
|
84
|
+
"""Number of cell types."""
|
|
85
|
+
return len(self.cell_types)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def n_genes(self) -> int:
|
|
89
|
+
"""Number of common genes."""
|
|
90
|
+
return len(self.common_genes)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# =============================================================================
|
|
94
|
+
# Single Entry Point
|
|
95
|
+
# =============================================================================
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def prepare_deconvolution(
|
|
99
|
+
spatial_adata: ad.AnnData,
|
|
100
|
+
reference_adata: ad.AnnData,
|
|
101
|
+
cell_type_key: str,
|
|
102
|
+
ctx: "ToolContext",
|
|
103
|
+
require_int_dtype: bool = False,
|
|
104
|
+
min_common_genes: int = 100,
|
|
105
|
+
preprocess: Optional[PreprocessHook] = None,
|
|
106
|
+
) -> PreparedDeconvolutionData:
|
|
107
|
+
"""Prepare data for deconvolution in a single function call.
|
|
108
|
+
|
|
109
|
+
This is the primary API for deconvolution data preparation. It handles:
|
|
110
|
+
1. Validation of cell type key
|
|
111
|
+
2. Raw count restoration for both datasets
|
|
112
|
+
3. Optional method-specific preprocessing (via hook)
|
|
113
|
+
4. Common gene identification and validation
|
|
114
|
+
5. Subsetting to common genes
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
spatial_adata: Spatial transcriptomics AnnData
|
|
118
|
+
reference_adata: Single-cell reference AnnData
|
|
119
|
+
cell_type_key: Column in reference.obs containing cell type labels
|
|
120
|
+
ctx: ToolContext for logging
|
|
121
|
+
require_int_dtype: Convert to int32 (required for R-based methods)
|
|
122
|
+
min_common_genes: Minimum required gene overlap
|
|
123
|
+
preprocess: Optional async hook for method-specific preprocessing.
|
|
124
|
+
Signature: async (spatial, reference, ctx) -> (spatial, reference)
|
|
125
|
+
Called after raw count restoration, before gene finding.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
PreparedDeconvolutionData with all fields populated
|
|
129
|
+
|
|
130
|
+
Examples:
|
|
131
|
+
# Standard usage (most methods)
|
|
132
|
+
data = await prepare_deconvolution(spatial, ref, "cell_type", ctx)
|
|
133
|
+
|
|
134
|
+
# With custom preprocessing (e.g., cell2location)
|
|
135
|
+
async def custom_filter(sp, ref, ctx):
|
|
136
|
+
sp = await apply_filtering(sp, ctx)
|
|
137
|
+
ref = await apply_filtering(ref, ctx)
|
|
138
|
+
return sp, ref
|
|
139
|
+
|
|
140
|
+
data = await prepare_deconvolution(
|
|
141
|
+
spatial, ref, "cell_type", ctx,
|
|
142
|
+
preprocess=custom_filter
|
|
143
|
+
)
|
|
144
|
+
"""
|
|
145
|
+
# 1. Extract spatial coordinates from original data (before any processing)
|
|
146
|
+
spatial_coords: Optional[NDArray[np.floating]] = None
|
|
147
|
+
spatial_key = get_spatial_key(spatial_adata)
|
|
148
|
+
if spatial_key:
|
|
149
|
+
spatial_coords = np.asarray(spatial_adata.obsm[spatial_key], dtype=np.float64)
|
|
150
|
+
|
|
151
|
+
# 2. Validate cell type key
|
|
152
|
+
validate_obs_column(reference_adata, cell_type_key, "Cell type")
|
|
153
|
+
|
|
154
|
+
# 3. Extract cell types
|
|
155
|
+
cell_types = list(reference_adata.obs[cell_type_key].unique())
|
|
156
|
+
if len(cell_types) < 2:
|
|
157
|
+
raise DataError(
|
|
158
|
+
f"Reference data must have at least 2 cell types, found {len(cell_types)}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# 4. Restore raw counts
|
|
162
|
+
spatial_prep = await _prepare_counts(
|
|
163
|
+
spatial_adata, "Spatial", ctx, require_int_dtype
|
|
164
|
+
)
|
|
165
|
+
reference_prep = await _prepare_counts(
|
|
166
|
+
reference_adata, "Reference", ctx, require_int_dtype
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# 5. Optional method-specific preprocessing
|
|
170
|
+
if preprocess is not None:
|
|
171
|
+
spatial_prep, reference_prep = await preprocess(
|
|
172
|
+
spatial_prep, reference_prep, ctx
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# 6. Find common genes
|
|
176
|
+
common_genes = find_common_genes(
|
|
177
|
+
spatial_prep.var_names,
|
|
178
|
+
reference_prep.var_names,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# 7. Validate gene overlap
|
|
182
|
+
validate_gene_overlap(
|
|
183
|
+
common_genes,
|
|
184
|
+
spatial_prep.n_vars,
|
|
185
|
+
reference_prep.n_vars,
|
|
186
|
+
min_genes=min_common_genes,
|
|
187
|
+
source_name="spatial",
|
|
188
|
+
target_name="reference",
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# 8. Return immutable result with data subset to common genes
|
|
192
|
+
return PreparedDeconvolutionData(
|
|
193
|
+
spatial=spatial_prep[:, common_genes].copy(),
|
|
194
|
+
reference=reference_prep[:, common_genes].copy(),
|
|
195
|
+
cell_type_key=cell_type_key,
|
|
196
|
+
cell_types=cell_types,
|
|
197
|
+
common_genes=common_genes,
|
|
198
|
+
spatial_coords=spatial_coords,
|
|
199
|
+
ctx=ctx,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
async def _prepare_counts(
|
|
204
|
+
adata: ad.AnnData,
|
|
205
|
+
label: str,
|
|
206
|
+
ctx: "ToolContext",
|
|
207
|
+
require_int_dtype: bool,
|
|
208
|
+
) -> ad.AnnData:
|
|
209
|
+
"""Prepare AnnData by restoring raw counts."""
|
|
210
|
+
result = get_raw_data_source(
|
|
211
|
+
adata,
|
|
212
|
+
prefer_complete_genes=True,
|
|
213
|
+
require_integer_counts=True,
|
|
214
|
+
sample_size=100,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if not result.is_integer_counts:
|
|
218
|
+
await ctx.warning(
|
|
219
|
+
f"{label}: Using normalized data (no raw counts available). "
|
|
220
|
+
f"This may be acceptable for some reference datasets."
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Construct copy based on source
|
|
224
|
+
if result.source == "raw":
|
|
225
|
+
adata_copy = adata.raw.to_adata()
|
|
226
|
+
# Preserve obsm from original (raw.to_adata() doesn't include it)
|
|
227
|
+
for key in adata.obsm:
|
|
228
|
+
adata_copy.obsm[key] = adata.obsm[key].copy()
|
|
229
|
+
elif result.source == "counts_layer":
|
|
230
|
+
adata_copy = adata.copy()
|
|
231
|
+
adata_copy.X = adata_copy.layers["counts"]
|
|
232
|
+
else:
|
|
233
|
+
adata_copy = adata.copy()
|
|
234
|
+
|
|
235
|
+
# Convert to int32 if required (R-based methods)
|
|
236
|
+
if require_int_dtype and result.is_integer_counts:
|
|
237
|
+
dense = to_dense(adata_copy.X)
|
|
238
|
+
adata_copy.X = (
|
|
239
|
+
dense.astype(np.int32, copy=False) if dense.dtype != np.int32 else dense
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return adata_copy
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# =============================================================================
|
|
246
|
+
# Statistics Helper
|
|
247
|
+
# =============================================================================
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def create_deconvolution_stats(
|
|
251
|
+
proportions: pd.DataFrame,
|
|
252
|
+
common_genes: list[str],
|
|
253
|
+
method: str,
|
|
254
|
+
device: str = "CPU",
|
|
255
|
+
**method_specific_params,
|
|
256
|
+
) -> dict[str, Any]:
|
|
257
|
+
"""Create standardized statistics dictionary for deconvolution results."""
|
|
258
|
+
cell_types = list(proportions.columns)
|
|
259
|
+
stats = {
|
|
260
|
+
"method": method,
|
|
261
|
+
"device": device,
|
|
262
|
+
"n_spots": len(proportions),
|
|
263
|
+
"n_cell_types": len(cell_types),
|
|
264
|
+
"cell_types": cell_types,
|
|
265
|
+
"genes_used": len(common_genes),
|
|
266
|
+
"common_genes": len(common_genes),
|
|
267
|
+
"mean_proportions": proportions.mean().to_dict(),
|
|
268
|
+
"dominant_types": proportions.idxmax(axis=1).value_counts().to_dict(),
|
|
269
|
+
}
|
|
270
|
+
stats.update(method_specific_params)
|
|
271
|
+
return stats
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# =============================================================================
|
|
275
|
+
# Convergence Checking
|
|
276
|
+
# =============================================================================
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def check_model_convergence(
|
|
280
|
+
model,
|
|
281
|
+
model_name: str,
|
|
282
|
+
convergence_threshold: float = 0.001,
|
|
283
|
+
convergence_window: int = 50,
|
|
284
|
+
) -> tuple[bool, Optional[str]]:
|
|
285
|
+
"""Check if a scvi-tools model has converged based on ELBO history."""
|
|
286
|
+
if not hasattr(model, "history") or model.history is None:
|
|
287
|
+
return True, None
|
|
288
|
+
|
|
289
|
+
history = model.history
|
|
290
|
+
elbo_keys = ["elbo_train", "elbo_validation", "train_loss_epoch"]
|
|
291
|
+
elbo_history = None
|
|
292
|
+
|
|
293
|
+
for key in elbo_keys:
|
|
294
|
+
if key in history and len(history[key]) > 0:
|
|
295
|
+
elbo_history = history[key]
|
|
296
|
+
break
|
|
297
|
+
|
|
298
|
+
if elbo_history is None or len(elbo_history) < convergence_window:
|
|
299
|
+
return True, None
|
|
300
|
+
|
|
301
|
+
elbo_arr = np.asarray(elbo_history).ravel()
|
|
302
|
+
recent_elbo = elbo_arr[-convergence_window:]
|
|
303
|
+
elbo_changes = np.abs(np.diff(recent_elbo))
|
|
304
|
+
|
|
305
|
+
mean_value = np.abs(np.mean(recent_elbo))
|
|
306
|
+
if mean_value > 0:
|
|
307
|
+
relative_changes = elbo_changes / mean_value
|
|
308
|
+
mean_relative_change = np.mean(relative_changes)
|
|
309
|
+
|
|
310
|
+
if mean_relative_change > convergence_threshold:
|
|
311
|
+
return False, (
|
|
312
|
+
f"{model_name} may not have fully converged. "
|
|
313
|
+
f"Mean relative ELBO change: {mean_relative_change:.4f} "
|
|
314
|
+
f"(threshold: {convergence_threshold}). "
|
|
315
|
+
"Consider increasing training epochs."
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return True, None
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CARD (Conditional AutoRegressive-based Deconvolution) method.
|
|
3
|
+
|
|
4
|
+
CARD models spatial correlation in cell type composition using a
|
|
5
|
+
CAR (Conditional AutoRegressive) model. Unique features:
|
|
6
|
+
- Spatial correlation modeling
|
|
7
|
+
- Optional high-resolution imputation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
from ...utils.dependency_manager import validate_r_package
|
|
19
|
+
from ...utils.exceptions import ProcessingError
|
|
20
|
+
from .base import PreparedDeconvolutionData, create_deconvolution_stats
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def deconvolve(
|
|
24
|
+
data: PreparedDeconvolutionData,
|
|
25
|
+
sample_key: Optional[str] = None,
|
|
26
|
+
minCountGene: int = 100,
|
|
27
|
+
minCountSpot: int = 5,
|
|
28
|
+
imputation: bool = False,
|
|
29
|
+
NumGrids: int = 2000,
|
|
30
|
+
ineibor: int = 10,
|
|
31
|
+
) -> tuple[pd.DataFrame, dict[str, Any]]:
|
|
32
|
+
"""Deconvolve spatial data using CARD R package.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
data: Prepared deconvolution data (immutable, includes spatial coordinates)
|
|
36
|
+
sample_key: Optional sample/batch key in reference data
|
|
37
|
+
minCountGene: Include genes with at least this many counts
|
|
38
|
+
minCountSpot: Include genes expressed in at least this many spots
|
|
39
|
+
imputation: Whether to perform spatial imputation
|
|
40
|
+
NumGrids: Number of grids for imputation
|
|
41
|
+
ineibor: Number of neighbors for imputation
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Tuple of (proportions DataFrame, statistics dictionary)
|
|
45
|
+
"""
|
|
46
|
+
import anndata2ri
|
|
47
|
+
import rpy2.robjects as ro
|
|
48
|
+
from rpy2.robjects import numpy2ri, pandas2ri
|
|
49
|
+
from rpy2.robjects.conversion import localconverter
|
|
50
|
+
|
|
51
|
+
ctx = data.ctx
|
|
52
|
+
|
|
53
|
+
# Validate R package
|
|
54
|
+
validate_r_package(
|
|
55
|
+
"CARD",
|
|
56
|
+
ctx,
|
|
57
|
+
install_cmd="devtools::install_github('YingMa0107/CARD')",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
# Load CARD
|
|
62
|
+
with localconverter(ro.default_converter + pandas2ri.converter):
|
|
63
|
+
ro.r("library(CARD)")
|
|
64
|
+
|
|
65
|
+
# Data already copied in prepare_deconvolution
|
|
66
|
+
spatial_data = data.spatial
|
|
67
|
+
reference_data = data.reference
|
|
68
|
+
|
|
69
|
+
# Get spatial coordinates from prepared data
|
|
70
|
+
if data.spatial_coords is not None:
|
|
71
|
+
spatial_location = pd.DataFrame(
|
|
72
|
+
data.spatial_coords[:, :2],
|
|
73
|
+
index=spatial_data.obs_names,
|
|
74
|
+
columns=["x", "y"],
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
spatial_location = pd.DataFrame(
|
|
78
|
+
{"x": range(spatial_data.n_obs), "y": [0] * spatial_data.n_obs},
|
|
79
|
+
index=spatial_data.obs_names,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Prepare metadata
|
|
83
|
+
sc_meta = reference_data.obs[[data.cell_type_key]].copy()
|
|
84
|
+
sc_meta.columns = ["cellType"]
|
|
85
|
+
|
|
86
|
+
if sample_key and sample_key in reference_data.obs:
|
|
87
|
+
sc_meta["sampleInfo"] = reference_data.obs[sample_key]
|
|
88
|
+
else:
|
|
89
|
+
sc_meta["sampleInfo"] = "sample1"
|
|
90
|
+
|
|
91
|
+
# Transfer matrices to R
|
|
92
|
+
with localconverter(ro.default_converter + anndata2ri.converter):
|
|
93
|
+
ro.globalenv["sc_count"] = reference_data.X.T
|
|
94
|
+
ro.globalenv["spatial_count"] = spatial_data.X.T
|
|
95
|
+
|
|
96
|
+
ro.globalenv["gene_names_ref"] = ro.StrVector(reference_data.var_names)
|
|
97
|
+
ro.globalenv["cell_names"] = ro.StrVector(reference_data.obs_names)
|
|
98
|
+
ro.globalenv["gene_names_spatial"] = ro.StrVector(spatial_data.var_names)
|
|
99
|
+
ro.globalenv["spot_names"] = ro.StrVector(spatial_data.obs_names)
|
|
100
|
+
|
|
101
|
+
ro.r(
|
|
102
|
+
"""
|
|
103
|
+
rownames(sc_count) <- gene_names_ref
|
|
104
|
+
colnames(sc_count) <- cell_names
|
|
105
|
+
rownames(spatial_count) <- gene_names_spatial
|
|
106
|
+
colnames(spatial_count) <- spot_names
|
|
107
|
+
"""
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Transfer metadata
|
|
111
|
+
with localconverter(ro.default_converter + pandas2ri.converter):
|
|
112
|
+
ro.globalenv["sc_meta"] = ro.conversion.py2rpy(sc_meta)
|
|
113
|
+
ro.globalenv["spatial_location"] = ro.conversion.py2rpy(spatial_location)
|
|
114
|
+
ro.globalenv["minCountGene"] = minCountGene
|
|
115
|
+
ro.globalenv["minCountSpot"] = minCountSpot
|
|
116
|
+
|
|
117
|
+
# Create CARD object and run deconvolution
|
|
118
|
+
ro.r(
|
|
119
|
+
"""
|
|
120
|
+
capture.output(
|
|
121
|
+
CARD_obj <- createCARDObject(
|
|
122
|
+
sc_count = sc_count,
|
|
123
|
+
sc_meta = sc_meta,
|
|
124
|
+
spatial_count = spatial_count,
|
|
125
|
+
spatial_location = spatial_location,
|
|
126
|
+
ct.varname = "cellType",
|
|
127
|
+
ct.select = unique(sc_meta$cellType),
|
|
128
|
+
sample.varname = "sampleInfo",
|
|
129
|
+
minCountGene = minCountGene,
|
|
130
|
+
minCountSpot = minCountSpot
|
|
131
|
+
),
|
|
132
|
+
file = "/dev/null"
|
|
133
|
+
)
|
|
134
|
+
capture.output(
|
|
135
|
+
CARD_obj <- CARD_deconvolution(CARD_object = CARD_obj),
|
|
136
|
+
file = "/dev/null"
|
|
137
|
+
)
|
|
138
|
+
"""
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Extract results
|
|
142
|
+
with localconverter(
|
|
143
|
+
ro.default_converter + pandas2ri.converter + numpy2ri.converter
|
|
144
|
+
):
|
|
145
|
+
row_names = list(ro.r("rownames(CARD_obj@Proportion_CARD)"))
|
|
146
|
+
col_names = list(ro.r("colnames(CARD_obj@Proportion_CARD)"))
|
|
147
|
+
proportions_r = ro.r("CARD_obj@Proportion_CARD")
|
|
148
|
+
proportions_array = np.array(proportions_r)
|
|
149
|
+
|
|
150
|
+
proportions = pd.DataFrame(
|
|
151
|
+
proportions_array, index=row_names, columns=col_names
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Optional imputation
|
|
155
|
+
imputed_proportions = None
|
|
156
|
+
imputed_coordinates = None
|
|
157
|
+
|
|
158
|
+
if imputation:
|
|
159
|
+
ro.r(
|
|
160
|
+
f"""
|
|
161
|
+
capture.output(
|
|
162
|
+
CARD_impute <- CARD.imputation(
|
|
163
|
+
CARD_object = CARD_obj,
|
|
164
|
+
NumGrids = {NumGrids},
|
|
165
|
+
ineibor = {ineibor}
|
|
166
|
+
),
|
|
167
|
+
file = "/dev/null"
|
|
168
|
+
)
|
|
169
|
+
"""
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
with localconverter(ro.default_converter + pandas2ri.converter):
|
|
173
|
+
imputed_row_names = list(ro.r("rownames(CARD_impute@refined_prop)"))
|
|
174
|
+
imputed_col_names = list(ro.r("colnames(CARD_impute@refined_prop)"))
|
|
175
|
+
imputed_proportions_r = ro.r("CARD_impute@refined_prop")
|
|
176
|
+
imputed_proportions_array = np.array(imputed_proportions_r)
|
|
177
|
+
|
|
178
|
+
# Parse coordinates from rownames
|
|
179
|
+
coords_list = []
|
|
180
|
+
for name in imputed_row_names:
|
|
181
|
+
parts = name.split("x")
|
|
182
|
+
coords_list.append([float(parts[0]), float(parts[1])])
|
|
183
|
+
|
|
184
|
+
imputed_proportions = pd.DataFrame(
|
|
185
|
+
imputed_proportions_array,
|
|
186
|
+
index=imputed_row_names,
|
|
187
|
+
columns=imputed_col_names,
|
|
188
|
+
)
|
|
189
|
+
imputed_coordinates = pd.DataFrame(
|
|
190
|
+
coords_list, index=imputed_row_names, columns=["x", "y"]
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Create statistics
|
|
194
|
+
stats = create_deconvolution_stats(
|
|
195
|
+
proportions,
|
|
196
|
+
data.common_genes,
|
|
197
|
+
method="CARD",
|
|
198
|
+
device="CPU",
|
|
199
|
+
minCountGene=minCountGene,
|
|
200
|
+
minCountSpot=minCountSpot,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if imputation and imputed_proportions is not None:
|
|
204
|
+
stats["imputation"] = {
|
|
205
|
+
"enabled": True,
|
|
206
|
+
"n_imputed_locations": len(imputed_proportions),
|
|
207
|
+
"resolution_increase": (
|
|
208
|
+
f"{len(imputed_proportions) / len(row_names):.1f}x"
|
|
209
|
+
),
|
|
210
|
+
"imputed_proportions": imputed_proportions,
|
|
211
|
+
"imputed_coordinates": imputed_coordinates,
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# Clean up R global environment
|
|
215
|
+
cleanup_vars = [
|
|
216
|
+
"sc_count",
|
|
217
|
+
"spatial_count",
|
|
218
|
+
"gene_names_ref",
|
|
219
|
+
"cell_names",
|
|
220
|
+
"gene_names_spatial",
|
|
221
|
+
"spot_names",
|
|
222
|
+
"sc_meta",
|
|
223
|
+
"spatial_location",
|
|
224
|
+
"minCountGene",
|
|
225
|
+
"minCountSpot",
|
|
226
|
+
"CARD_obj",
|
|
227
|
+
]
|
|
228
|
+
if imputation:
|
|
229
|
+
cleanup_vars.append("CARD_impute")
|
|
230
|
+
|
|
231
|
+
ro.r(
|
|
232
|
+
f"""
|
|
233
|
+
rm(list = c({', '.join(f'"{v}"' for v in cleanup_vars)}),
|
|
234
|
+
envir = .GlobalEnv)
|
|
235
|
+
gc()
|
|
236
|
+
"""
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
return proportions, stats
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
if isinstance(e, ProcessingError):
|
|
243
|
+
raise
|
|
244
|
+
raise ProcessingError(f"CARD deconvolution failed: {e}") from e
|