sc-mantis 0.51__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sc_mantis-0.51/LICENSE +21 -0
- sc_mantis-0.51/PKG-INFO +12 -0
- sc_mantis-0.51/README.md +1 -0
- sc_mantis-0.51/mantis/__init__.py +5 -0
- sc_mantis-0.51/mantis/io/__init__.py +1 -0
- sc_mantis-0.51/mantis/io/_io.py +125 -0
- sc_mantis-0.51/mantis/tools/__init__.py +6 -0
- sc_mantis-0.51/mantis/tools/_celltype.py +129 -0
- sc_mantis-0.51/mantis/tools/_genemet.py +226 -0
- sc_mantis-0.51/mantis/tools/_region.py +231 -0
- sc_mantis-0.51/mantis/tools/_sampler.py +549 -0
- sc_mantis-0.51/mantis/tools/_space.py +177 -0
- sc_mantis-0.51/mantis/tools/_spc.py +791 -0
- sc_mantis-0.51/pyproject.toml +26 -0
- sc_mantis-0.51/sc_mantis.egg-info/PKG-INFO +12 -0
- sc_mantis-0.51/sc_mantis.egg-info/SOURCES.txt +17 -0
- sc_mantis-0.51/sc_mantis.egg-info/dependency_links.txt +1 -0
- sc_mantis-0.51/sc_mantis.egg-info/top_level.txt +2 -0
- sc_mantis-0.51/setup.cfg +4 -0
sc_mantis-0.51/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 yuhaotuo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sc_mantis-0.51/PKG-INFO
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sc-mantis
|
|
3
|
+
Version: 0.51
|
|
4
|
+
Summary: SCOUBI
|
|
5
|
+
Author-email: Yu Hao <yhao306@gatech.edu>, Bhavay Aggarwal <baggarwal9@gatech.edu>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Dynamic: license-file
|
|
11
|
+
|
|
12
|
+
# MANTIS
|
sc_mantis-0.51/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# MANTIS
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ._io import load_data
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import anndata as ad
|
|
4
|
+
from mudata import MuData
|
|
5
|
+
from mudata import set_options
|
|
6
|
+
import mudata as mu
|
|
7
|
+
import os
|
|
8
|
+
import types
|
|
9
|
+
|
|
10
|
+
def _summarize(self):
|
|
11
|
+
"""Structured summary of MuData with grouped .uns fields."""
|
|
12
|
+
print(f"MuData object with n_obs × n_vars = {self.n_obs} × {self.n_vars}")
|
|
13
|
+
print("obs:", ", ".join(self.obs_keys()))
|
|
14
|
+
print("obsm:", ", ".join(self.obsm_keys()))
|
|
15
|
+
print("uns:")
|
|
16
|
+
|
|
17
|
+
# --- Predefined grouping map ---
|
|
18
|
+
group_map = {
|
|
19
|
+
'cell_type': ['celltype_metabolite', 'celltype_metabolite_per_ct'],
|
|
20
|
+
'region': ['regional_metabolite', 'regional_metabolite_fit'],
|
|
21
|
+
'spatvar': ['spatvar_metabolite', 'spatvar_metabolite_celltype',
|
|
22
|
+
'spatvar_metabolite_celltype_coef', 'spatvar_metabolite_celltype_summary',
|
|
23
|
+
'spatvar_metabolite_combined', 'spatvar_metabolite_region'],
|
|
24
|
+
'genemet': ['genemet_sci', 'genemet_sci_summary'],
|
|
25
|
+
'misc': [],
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# flatten group map for lookup
|
|
29
|
+
assigned = set(k for keys in group_map.values() for k in keys)
|
|
30
|
+
uns_keys = set(self.uns.keys())
|
|
31
|
+
ungrouped = sorted(uns_keys - assigned)
|
|
32
|
+
|
|
33
|
+
# print present keys per group
|
|
34
|
+
for group, keys in group_map.items():
|
|
35
|
+
present = [k for k in keys if k in self.uns]
|
|
36
|
+
if present:
|
|
37
|
+
print(f" {group:<22}: {', '.join(present)}")
|
|
38
|
+
|
|
39
|
+
if ungrouped:
|
|
40
|
+
print(f" {'other':<22}: {', '.join(ungrouped)}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _attach_summarize(mdata):
|
|
44
|
+
"""Attach summarize() method to a single MuData object in memory."""
|
|
45
|
+
mdata.summarize = types.MethodType(_summarize, mdata)
|
|
46
|
+
return mdata
|
|
47
|
+
|
|
48
|
+
def load_data(
|
|
49
|
+
g_file=None, # CSV of gene expression (cells × genes)
|
|
50
|
+
m_file=None, # CSV of metabolite abundances (cells × metabolites)
|
|
51
|
+
coords=None, # CSV of spatial coordinates
|
|
52
|
+
cell_type=None, # CSV of cell type labels
|
|
53
|
+
region=None, # CSV of region labels
|
|
54
|
+
mdata=None # existing .h5mu file path
|
|
55
|
+
):
|
|
56
|
+
set_options(pull_on_update=False)
|
|
57
|
+
"""
|
|
58
|
+
Load or build a MuData object containing multi-omic and spatial data.
|
|
59
|
+
|
|
60
|
+
If `mdata` is provided, loads it directly and ignores all other inputs.
|
|
61
|
+
Otherwise, builds a MuData object from the provided CSV files.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
if mdata is not None:
|
|
65
|
+
if not os.path.exists(mdata):
|
|
66
|
+
raise FileNotFoundError(f"File not found: {mdata}")
|
|
67
|
+
mdata_obj = mu.read_h5mu(mdata)
|
|
68
|
+
mdata_obj = _attach_summarize(mdata_obj)
|
|
69
|
+
# mu.MuData.summarize = _summarize
|
|
70
|
+
return mdata_obj
|
|
71
|
+
|
|
72
|
+
if coords is None:
|
|
73
|
+
raise ValueError("Spatial coordinate file (coords) must be provided.")
|
|
74
|
+
coords_df = pd.read_csv(coords, index_col=0)
|
|
75
|
+
|
|
76
|
+
gene_df = pd.read_csv(g_file, index_col=0) if g_file else None
|
|
77
|
+
met_df = pd.read_csv(m_file, index_col=0) if m_file else None
|
|
78
|
+
|
|
79
|
+
common_idx = coords_df.index
|
|
80
|
+
if gene_df is not None:
|
|
81
|
+
common_idx = common_idx.intersection(gene_df.index)
|
|
82
|
+
if met_df is not None:
|
|
83
|
+
common_idx = common_idx.intersection(met_df.index)
|
|
84
|
+
|
|
85
|
+
if len(common_idx) == 0:
|
|
86
|
+
raise ValueError("No common cell indices found among provided datasets.")
|
|
87
|
+
|
|
88
|
+
coords_df = coords_df.loc[common_idx]
|
|
89
|
+
if gene_df is not None:
|
|
90
|
+
gene_df = gene_df.loc[common_idx]
|
|
91
|
+
if met_df is not None:
|
|
92
|
+
met_df = met_df.loc[common_idx]
|
|
93
|
+
|
|
94
|
+
adatas = {}
|
|
95
|
+
if gene_df is not None:
|
|
96
|
+
adatas["gene"] = ad.AnnData(
|
|
97
|
+
X=gene_df.to_numpy(dtype=float),
|
|
98
|
+
obs=pd.DataFrame(index=common_idx),
|
|
99
|
+
var=pd.DataFrame(index=gene_df.columns)
|
|
100
|
+
)
|
|
101
|
+
if met_df is not None:
|
|
102
|
+
adatas["metabolite"] = ad.AnnData(
|
|
103
|
+
X=met_df.to_numpy(dtype=float),
|
|
104
|
+
obs=pd.DataFrame(index=common_idx),
|
|
105
|
+
var=pd.DataFrame(index=met_df.columns)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if not adatas:
|
|
109
|
+
raise ValueError("No omic data provided. Must provide at least one of g_file or m_file.")
|
|
110
|
+
|
|
111
|
+
mdata_obj = MuData(adatas)
|
|
112
|
+
|
|
113
|
+
obs_meta = pd.DataFrame(index=common_idx)
|
|
114
|
+
if cell_type is not None:
|
|
115
|
+
celltype_df = pd.read_csv(cell_type, index_col=0)
|
|
116
|
+
obs_meta["cell_type"] = celltype_df.loc[common_idx]
|
|
117
|
+
if region is not None:
|
|
118
|
+
region_df = pd.read_csv(region, index_col=0)
|
|
119
|
+
obs_meta["region"] = region_df.loc[common_idx]
|
|
120
|
+
mdata_obj.obsm = obs_meta
|
|
121
|
+
|
|
122
|
+
mdata_obj.obsm["spatial"] = coords_df
|
|
123
|
+
mdata_obj = _attach_summarize(mdata_obj)
|
|
124
|
+
mu.MuData.summarize = _summarize
|
|
125
|
+
return mdata_obj
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from scipy.spatial.distance import pdist, squareform
|
|
7
|
+
|
|
8
|
+
def _bh_fdr_rowwise(p_mat: np.ndarray):
|
|
9
|
+
p = np.asarray(p_mat, dtype=float)
|
|
10
|
+
K, M = p.shape
|
|
11
|
+
q = np.empty_like(p)
|
|
12
|
+
for i in range(K):
|
|
13
|
+
pi = p[i].copy()
|
|
14
|
+
order = np.argsort(pi, kind="mergesort")
|
|
15
|
+
ranks = np.arange(1, M + 1, dtype=float)
|
|
16
|
+
qi = pi[order] * M / ranks
|
|
17
|
+
qi = np.minimum.accumulate(qi[::-1])[::-1]
|
|
18
|
+
out = np.empty_like(qi)
|
|
19
|
+
out[order] = qi
|
|
20
|
+
q[i] = np.clip(out, 0, 1)
|
|
21
|
+
return q
|
|
22
|
+
|
|
23
|
+
def _significant_by_ct_single_null(
|
|
24
|
+
SCI_real: pd.DataFrame,
|
|
25
|
+
SCI_null: pd.DataFrame,
|
|
26
|
+
alpha: float = 0.05,
|
|
27
|
+
two_sided: bool = True,
|
|
28
|
+
):
|
|
29
|
+
|
|
30
|
+
rows = SCI_real.index.intersection(SCI_null.index)
|
|
31
|
+
cols = SCI_real.columns.intersection(SCI_null.columns)
|
|
32
|
+
R = SCI_real.loc[rows, cols].astype(float)
|
|
33
|
+
N = SCI_null.loc[rows, cols].astype(float)
|
|
34
|
+
|
|
35
|
+
K, M = R.shape
|
|
36
|
+
real_vals = R.to_numpy() # K×M
|
|
37
|
+
null_vals = N.to_numpy() # K×M
|
|
38
|
+
|
|
39
|
+
if two_sided:
|
|
40
|
+
real_use = np.abs(real_vals)
|
|
41
|
+
null_use = np.abs(null_vals)
|
|
42
|
+
else:
|
|
43
|
+
real_use = real_vals
|
|
44
|
+
null_use = null_vals
|
|
45
|
+
|
|
46
|
+
p_emp = np.empty_like(real_vals, dtype=float)
|
|
47
|
+
for i in range(K):
|
|
48
|
+
base = null_use[i]
|
|
49
|
+
for j in range(M):
|
|
50
|
+
p_emp[i, j] = (np.sum(base >= real_use[i, j]) + 1.0) / (M + 1.0)
|
|
51
|
+
|
|
52
|
+
q_bh = _bh_fdr_rowwise(p_emp)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
long = []
|
|
56
|
+
for i, ct in enumerate(rows):
|
|
57
|
+
for j, met in enumerate(cols):
|
|
58
|
+
long.append({
|
|
59
|
+
"cell_type": ct,
|
|
60
|
+
"metabolite": met,
|
|
61
|
+
"sci": float(real_vals[i, j]),
|
|
62
|
+
"p_emp": float(p_emp[i, j]),
|
|
63
|
+
"q_bh": float(q_bh[i, j]),
|
|
64
|
+
})
|
|
65
|
+
long_df = pd.DataFrame(long)
|
|
66
|
+
long_df["significant"] = long_df["q_bh"] < alpha
|
|
67
|
+
|
|
68
|
+
per_ct = {}
|
|
69
|
+
sig_rows = []
|
|
70
|
+
for ct, sub in long_df.groupby("cell_type"):
|
|
71
|
+
sig = sub[sub["significant"]].sort_values(["q_bh", "p_emp", "sci"])
|
|
72
|
+
per_ct[ct] = sig
|
|
73
|
+
if not sig.empty:
|
|
74
|
+
sig_rows.append(sig)
|
|
75
|
+
return long_df, per_ct
|
|
76
|
+
|
|
77
|
+
def _zscore_cols(A):
|
|
78
|
+
mu = A.mean(axis=0, keepdims=True)
|
|
79
|
+
sd = A.std(axis=0, ddof=0, keepdims=True)
|
|
80
|
+
sd = np.where(sd == 0, 1.0, sd)
|
|
81
|
+
return (A - mu) / sd
|
|
82
|
+
|
|
83
|
+
def _build_W(xy, l="dmin", norm="row"):
|
|
84
|
+
D = squareform(pdist(xy, metric="euclidean"))
|
|
85
|
+
d_nonzero = D[D > 0]
|
|
86
|
+
if isinstance(l, str) and l == "dmin":
|
|
87
|
+
if d_nonzero.size == 0:
|
|
88
|
+
raise ValueError("there is no dmin.")
|
|
89
|
+
lval = float(np.min(d_nonzero))
|
|
90
|
+
else:
|
|
91
|
+
lval = float(l)
|
|
92
|
+
W = np.exp(-(D**2) / (2.0 * lval * lval))
|
|
93
|
+
np.fill_diagonal(W, 1.0)
|
|
94
|
+
if norm == "row":
|
|
95
|
+
W = W / W.sum(axis=1, keepdims=True)
|
|
96
|
+
elif norm == "global":
|
|
97
|
+
W = W / W.sum()
|
|
98
|
+
return W
|
|
99
|
+
|
|
100
|
+
def _compute_sci(Ydf, Cdf, xy, normalized=True, l="dmin", w_norm="row"):
|
|
101
|
+
W = _build_W(xy, l=l, norm=w_norm)
|
|
102
|
+
Cz = _zscore_cols(Cdf.to_numpy())
|
|
103
|
+
Yz = _zscore_cols(Ydf.to_numpy())
|
|
104
|
+
|
|
105
|
+
WY = W @ Yz
|
|
106
|
+
SCI_raw = Cz.T @ WY
|
|
107
|
+
|
|
108
|
+
if not normalized:
|
|
109
|
+
return pd.DataFrame(SCI_raw, index=Cdf.columns, columns=Ydf.columns)
|
|
110
|
+
|
|
111
|
+
xvar = np.sum(Cz**2, axis=0) # K
|
|
112
|
+
yvar = np.sum(Yz**2, axis=0) # M
|
|
113
|
+
den = np.sqrt(xvar[:, None] * yvar[None, :])
|
|
114
|
+
den = np.where(den == 0, 1.0, den)
|
|
115
|
+
SCI_corr = SCI_raw / den
|
|
116
|
+
|
|
117
|
+
return pd.DataFrame(SCI_corr, index=Cdf.columns, columns=Ydf.columns)
|
|
118
|
+
|
|
119
|
+
def celltype_met(mdata, alpha = 0.05, two_sided = True):
|
|
120
|
+
sci = _compute_sci(mdata.mod['metabolite'].to_df(), mdata.obsm['cell_type'], mdata.obsm['spatial'].values, normalized=True)
|
|
121
|
+
sci_null = _compute_sci(mdata.uns['metabolite_null'].to_df(), mdata.obsm['cell_type'], mdata.obsm['spatial'].values, normalized=True)
|
|
122
|
+
long_df, per_ct = _significant_by_ct_single_null(
|
|
123
|
+
sci, sci_null,
|
|
124
|
+
alpha=alpha,
|
|
125
|
+
two_sided=two_sided
|
|
126
|
+
)
|
|
127
|
+
mdata.uns['celltype_met'] = long_df
|
|
128
|
+
mdata.uns['celltype_met_per_ct'] = per_ct
|
|
129
|
+
return mdata
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from scipy.spatial.distance import pdist, squareform
|
|
5
|
+
import math
|
|
6
|
+
from scipy.spatial import cKDTree
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import networkx as nx
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
def _build_weighted_radius_graph(coords, radius=None, l=None, weight_key="w"):
|
|
12
|
+
|
|
13
|
+
if (radius is None) and (l is None):
|
|
14
|
+
raise ValueError("radius or l should be supplied")
|
|
15
|
+
if l is None:
|
|
16
|
+
l = float(radius)
|
|
17
|
+
if radius is None:
|
|
18
|
+
radius = 2.0 * float(l)
|
|
19
|
+
|
|
20
|
+
tree = cKDTree(coords)
|
|
21
|
+
coo = tree.sparse_distance_matrix(tree, max_distance=radius, output_type="coo_matrix")
|
|
22
|
+
G = nx.Graph()
|
|
23
|
+
G.add_nodes_from(range(coords.shape[0]))
|
|
24
|
+
inv_two_l2 = 1.0 / (2.0 * float(l) * float(l))
|
|
25
|
+
|
|
26
|
+
for i, j, d in zip(coo.row, coo.col, coo.data):
|
|
27
|
+
if i < j:
|
|
28
|
+
if d <= l:
|
|
29
|
+
w = 1.0
|
|
30
|
+
else:
|
|
31
|
+
w = float(np.exp(-(d * d) * inv_two_l2))
|
|
32
|
+
if w > 0.0:
|
|
33
|
+
G.add_edge(i, j, **{weight_key: w})
|
|
34
|
+
return G
|
|
35
|
+
|
|
36
|
+
def _target_diag_M_from_matrix(G, X_mat, weight_key="w"):
|
|
37
|
+
M = X_mat.shape[1]
|
|
38
|
+
Mt = np.zeros(M, dtype=float)
|
|
39
|
+
for u, v, data in G.edges(data=True):
|
|
40
|
+
w = data.get(weight_key, 1.0)
|
|
41
|
+
Mt += w * X_mat[u] * X_mat[v]
|
|
42
|
+
return Mt
|
|
43
|
+
|
|
44
|
+
def _build_W_from_coords(coords_xy, l):
|
|
45
|
+
D_condensed = pdist(coords_xy, metric="euclidean")
|
|
46
|
+
dist = squareform(D_condensed)
|
|
47
|
+
W = np.zeros_like(dist, dtype=float)
|
|
48
|
+
|
|
49
|
+
mask = dist > 0
|
|
50
|
+
W[mask] = np.exp(-(dist[mask]**2) / (2.0 * l**2))
|
|
51
|
+
np.fill_diagonal(W, 1.0)
|
|
52
|
+
|
|
53
|
+
row_sums = W.sum(axis=1, keepdims=True)
|
|
54
|
+
row_sums[row_sums == 0.0] = 1.0
|
|
55
|
+
W /= row_sums
|
|
56
|
+
return W
|
|
57
|
+
|
|
58
|
+
def _zscore_cols(A: np.ndarray) -> np.ndarray:
|
|
59
|
+
mu = A.mean(axis=0, keepdims=True)
|
|
60
|
+
sd = A.std(axis=0, ddof=0, keepdims=True)
|
|
61
|
+
sd[sd == 0.0] = 1.0
|
|
62
|
+
return (A - mu) / sd
|
|
63
|
+
|
|
64
|
+
def bh_fdr(p_values_1d):
|
|
65
|
+
|
|
66
|
+
p = np.asarray(p_values_1d, dtype=float)
|
|
67
|
+
n = p.size
|
|
68
|
+
order = np.argsort(p)
|
|
69
|
+
ranked = p[order]
|
|
70
|
+
q = np.empty_like(ranked)
|
|
71
|
+
prev = 1.0
|
|
72
|
+
for i in range(n-1, -1, -1):
|
|
73
|
+
rank = i + 1
|
|
74
|
+
q_i = ranked[i] * n / rank
|
|
75
|
+
if q_i > prev:
|
|
76
|
+
q_i = prev
|
|
77
|
+
prev = q_i
|
|
78
|
+
q[i] = q_i
|
|
79
|
+
out = np.empty_like(q)
|
|
80
|
+
out[order] = q
|
|
81
|
+
return out
|
|
82
|
+
|
|
83
|
+
def pooled_empirical_p_two(real_mat, null_mat):
|
|
84
|
+
real = real_mat.ravel()
|
|
85
|
+
null_vals = null_mat.ravel()
|
|
86
|
+
null_vals = null_vals[np.isfinite(null_vals)]
|
|
87
|
+
|
|
88
|
+
null_sorted = np.sort(null_vals)
|
|
89
|
+
N = null_sorted.size
|
|
90
|
+
|
|
91
|
+
idx_right = np.searchsorted(null_sorted, real, side='right')
|
|
92
|
+
count_le = idx_right
|
|
93
|
+
count_ge = N - np.searchsorted(null_sorted, real, side='left')
|
|
94
|
+
|
|
95
|
+
p_left = (count_le + 1) / (N + 1)
|
|
96
|
+
p_right = (count_ge + 1) / (N + 1)
|
|
97
|
+
|
|
98
|
+
p_two = 2 * np.minimum(p_left, p_right)
|
|
99
|
+
p_two = np.minimum(p_two, 1.0)
|
|
100
|
+
return p_two.reshape(real_mat.shape)
|
|
101
|
+
|
|
102
|
+
def compute_genemet_sci(mdata, n_bins=10, alpha=0.05):
|
|
103
|
+
dmin = math.sqrt(2.0); l = 4*dmin
|
|
104
|
+
G = _build_weighted_radius_graph(mdata.obsm['spatial'].values, l=l, radius=float("inf"), weight_key="w")
|
|
105
|
+
X_all = np.asarray(mdata.mod['metabolite'].to_df(), dtype=float)
|
|
106
|
+
X_df = mdata.mod['metabolite'].to_df()
|
|
107
|
+
X_rna = mdata.mod['gene'].to_df().to_numpy()
|
|
108
|
+
met_names = X_df.columns.tolist()
|
|
109
|
+
X_mean = X_all.mean(axis=0, keepdims=True)
|
|
110
|
+
X_std = X_all.std(axis=0, ddof=0, keepdims=True); X_std[X_std==0] = 1.0
|
|
111
|
+
Xz = (X_all - X_mean) / X_std
|
|
112
|
+
|
|
113
|
+
M0_varnorm = _target_diag_M_from_matrix(G, Xz, weight_key="w")
|
|
114
|
+
M0 = np.asarray(M0_varnorm, dtype=float)
|
|
115
|
+
q = np.linspace(0, 1, n_bins + 1)
|
|
116
|
+
bin_edges = np.quantile(M0, q, method="linear")
|
|
117
|
+
edges = [bin_edges[0]]
|
|
118
|
+
for v in bin_edges[1:]:
|
|
119
|
+
if v > edges[-1]:
|
|
120
|
+
edges.append(v)
|
|
121
|
+
bin_edges = np.array(edges, dtype=float)
|
|
122
|
+
k = len(bin_edges) - 1
|
|
123
|
+
|
|
124
|
+
s = pd.Series(M0, index=pd.Index(met_names, name="metabolite"))
|
|
125
|
+
idx = np.searchsorted(bin_edges, s.values, side="right") - 1
|
|
126
|
+
idx = np.clip(idx, 0, k-1)
|
|
127
|
+
|
|
128
|
+
bin_mets = [[] for _ in range(k)]
|
|
129
|
+
for name, b in zip(s.index, idx):
|
|
130
|
+
bin_mets[b].append(name)
|
|
131
|
+
|
|
132
|
+
name_to_idx = {name: i for i, name in enumerate(met_names)}
|
|
133
|
+
bin_counts = []
|
|
134
|
+
bin_dfs = {}
|
|
135
|
+
for i, mets in enumerate(bin_mets):
|
|
136
|
+
if not mets:
|
|
137
|
+
bin_counts.append(0)
|
|
138
|
+
continue
|
|
139
|
+
cols = [name_to_idx[m] for m in mets if m in name_to_idx]
|
|
140
|
+
bin_counts.append(len(cols))
|
|
141
|
+
if cols:
|
|
142
|
+
df_bin = pd.DataFrame(
|
|
143
|
+
X_all[:, cols],
|
|
144
|
+
columns=[met_names[j] for j in cols],
|
|
145
|
+
index=pd.Index(mdata.obs_names, name="spot_id")
|
|
146
|
+
)
|
|
147
|
+
left, right = float(bin_edges[i]), float(bin_edges[i+1])
|
|
148
|
+
bin_dfs[i+1] = df_bin
|
|
149
|
+
l = dmin
|
|
150
|
+
W = _build_W_from_coords(mdata.obsm['spatial'].values, l=l)
|
|
151
|
+
S0 = float(W.sum())
|
|
152
|
+
if S0 == 0: S0 = 1.0
|
|
153
|
+
genemet_sci_bins = {}
|
|
154
|
+
for i in bin_dfs:
|
|
155
|
+
X_met = bin_dfs[i].to_numpy(dtype=float)
|
|
156
|
+
X_rna_std = (X_rna - X_rna.mean(axis=0, keepdims=True)) / np.maximum(X_rna.std(axis=0, ddof=0, keepdims=True), 1e-12)
|
|
157
|
+
X_met_std = (X_met - X_met.mean(axis=0, keepdims=True)) / np.maximum(X_met.std(axis=0, ddof=0, keepdims=True), 1e-12)
|
|
158
|
+
WY = W.dot(X_met_std)
|
|
159
|
+
WY = W @ X_met_std
|
|
160
|
+
SCI = (X_rna_std.T @ WY) / S0
|
|
161
|
+
sci_df = pd.DataFrame(SCI, index=mdata.mod['gene'].to_df().columns, columns=bin_dfs[i].columns)
|
|
162
|
+
genemet_sci_bins[i] = sci_df
|
|
163
|
+
|
|
164
|
+
X_met_null_df = mdata.uns['metabolite_null'].to_df()
|
|
165
|
+
D = squareform(pdist(mdata.obsm['spatial'].values, metric="euclidean"))
|
|
166
|
+
pos = D > 0
|
|
167
|
+
dmin = float(D[pos].min()) if np.any(pos) else np.finfo(float).eps
|
|
168
|
+
l = dmin
|
|
169
|
+
W = _build_W_from_coords(mdata.obsm['spatial'].values, l=l)
|
|
170
|
+
S0 = float(W.sum())
|
|
171
|
+
if S0 == 0: S0 = 1.0
|
|
172
|
+
genemet_sci_bins_null = {}
|
|
173
|
+
for i in bin_dfs:
|
|
174
|
+
cols_present = [c for c in bin_dfs[i].columns if c in X_met_null_df.columns]
|
|
175
|
+
X_met_null = X_met_null_df[cols_present].to_numpy(dtype=float)
|
|
176
|
+
X_rna_std = _zscore_cols(X_rna)
|
|
177
|
+
X_met_null_std = _zscore_cols(X_met_null)
|
|
178
|
+
|
|
179
|
+
WY = W.dot(X_met_null_std)
|
|
180
|
+
WY = W @ X_met_null_std
|
|
181
|
+
SCI = (X_rna_std.T @ WY) / S0
|
|
182
|
+
sci_df = pd.DataFrame(SCI, index=mdata.mod['gene'].to_df().columns, columns=bin_dfs[i].columns)
|
|
183
|
+
genemet_sci_bins_null[i] = sci_df
|
|
184
|
+
results = {}
|
|
185
|
+
for n in genemet_sci_bins:
|
|
186
|
+
common_genes = genemet_sci_bins[n].index.intersection(genemet_sci_bins_null[n].index)
|
|
187
|
+
common_mets = genemet_sci_bins[n].columns.intersection(genemet_sci_bins_null[n].columns)
|
|
188
|
+
real_df = genemet_sci_bins[n].loc[common_genes, common_mets]
|
|
189
|
+
null_df = genemet_sci_bins_null[n].loc[common_genes, common_mets]
|
|
190
|
+
p_two = pooled_empirical_p_two(real_df.values, null_df.values)
|
|
191
|
+
q_two = bh_fdr(p_two.ravel()).reshape(p_two.shape)
|
|
192
|
+
|
|
193
|
+
sig_mask = (q_two < alpha)
|
|
194
|
+
|
|
195
|
+
res = []
|
|
196
|
+
genes = real_df.index.tolist()
|
|
197
|
+
mets = real_df.columns.tolist()
|
|
198
|
+
for i, g in enumerate(genes):
|
|
199
|
+
for j, m in enumerate(mets):
|
|
200
|
+
res.append({
|
|
201
|
+
"gene": g,
|
|
202
|
+
"metabolite": m,
|
|
203
|
+
"SCI": float(real_df.iloc[i, j]),
|
|
204
|
+
"p_value": float(p_two[i, j]),
|
|
205
|
+
"q_value": float(q_two[i, j]),
|
|
206
|
+
"significant": bool(sig_mask[i, j])
|
|
207
|
+
})
|
|
208
|
+
res_df = pd.DataFrame(res)
|
|
209
|
+
print(int(sig_mask.sum()))
|
|
210
|
+
results[n] = res_df
|
|
211
|
+
dfs = []
|
|
212
|
+
for i in results:
|
|
213
|
+
results[i]["bin_index"] = i
|
|
214
|
+
dfs.append(results[i])
|
|
215
|
+
merged_eq = pd.concat(dfs, ignore_index=True, sort=False)
|
|
216
|
+
meta_cols = ["method", "bin_index", "bin_min", "bin_max", "bin_label", "source_file"]
|
|
217
|
+
front = [c for c in meta_cols if c in merged_eq.columns]
|
|
218
|
+
rest = [c for c in merged_eq.columns if c not in front]
|
|
219
|
+
merged_eq = merged_eq[front + rest]
|
|
220
|
+
summary = (
|
|
221
|
+
merged_eq.groupby(["bin_index"], dropna=False)
|
|
222
|
+
.size().reset_index(name="significant_pairs")
|
|
223
|
+
)
|
|
224
|
+
mdata.uns['genemet_sci'] = merged_eq
|
|
225
|
+
mdata.uns['genemet_sci_summary'] = summary
|
|
226
|
+
return mdata
|