sc-mantis 0.51__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sc_mantis-0.51/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 yuhaotuo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: sc-mantis
3
+ Version: 0.51
4
+ Summary: SCOUBI
5
+ Author-email: Yu Hao <yhao306@gatech.edu>, Bhavay Aggarwal <baggarwal9@gatech.edu>
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Dynamic: license-file
11
+
12
+ # MANTIS
@@ -0,0 +1 @@
1
+ # MANTIS
@@ -0,0 +1,5 @@
1
+ from . import io
2
+ # from . import preprocess as pp
3
+ # from . import model as md
4
+ from . import tools as tl
5
+ # from . import plotting as pl
@@ -0,0 +1 @@
1
+ from ._io import load_data
@@ -0,0 +1,125 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import anndata as ad
4
+ from mudata import MuData
5
+ from mudata import set_options
6
+ import mudata as mu
7
+ import os
8
+ import types
9
+
10
+ def _summarize(self):
11
+ """Structured summary of MuData with grouped .uns fields."""
12
+ print(f"MuData object with n_obs × n_vars = {self.n_obs} × {self.n_vars}")
13
+ print("obs:", ", ".join(self.obs_keys()))
14
+ print("obsm:", ", ".join(self.obsm_keys()))
15
+ print("uns:")
16
+
17
+ # --- Predefined grouping map ---
18
+ group_map = {
19
+ 'cell_type': ['celltype_metabolite', 'celltype_metabolite_per_ct'],
20
+ 'region': ['regional_metabolite', 'regional_metabolite_fit'],
21
+ 'spatvar': ['spatvar_metabolite', 'spatvar_metabolite_celltype',
22
+ 'spatvar_metabolite_celltype_coef', 'spatvar_metabolite_celltype_summary',
23
+ 'spatvar_metabolite_combined', 'spatvar_metabolite_region'],
24
+ 'genemet': ['genemet_sci', 'genemet_sci_summary'],
25
+ 'misc': [],
26
+ }
27
+
28
+ # flatten group map for lookup
29
+ assigned = set(k for keys in group_map.values() for k in keys)
30
+ uns_keys = set(self.uns.keys())
31
+ ungrouped = sorted(uns_keys - assigned)
32
+
33
+ # print present keys per group
34
+ for group, keys in group_map.items():
35
+ present = [k for k in keys if k in self.uns]
36
+ if present:
37
+ print(f" {group:<22}: {', '.join(present)}")
38
+
39
+ if ungrouped:
40
+ print(f" {'other':<22}: {', '.join(ungrouped)}")
41
+
42
+
43
+ def _attach_summarize(mdata):
44
+ """Attach summarize() method to a single MuData object in memory."""
45
+ mdata.summarize = types.MethodType(_summarize, mdata)
46
+ return mdata
47
+
48
+ def load_data(
49
+ g_file=None, # CSV of gene expression (cells × genes)
50
+ m_file=None, # CSV of metabolite abundances (cells × metabolites)
51
+ coords=None, # CSV of spatial coordinates
52
+ cell_type=None, # CSV of cell type labels
53
+ region=None, # CSV of region labels
54
+ mdata=None # existing .h5mu file path
55
+ ):
56
+ set_options(pull_on_update=False)
57
+ """
58
+ Load or build a MuData object containing multi-omic and spatial data.
59
+
60
+ If `mdata` is provided, loads it directly and ignores all other inputs.
61
+ Otherwise, builds a MuData object from the provided CSV files.
62
+ """
63
+
64
+ if mdata is not None:
65
+ if not os.path.exists(mdata):
66
+ raise FileNotFoundError(f"File not found: {mdata}")
67
+ mdata_obj = mu.read_h5mu(mdata)
68
+ mdata_obj = _attach_summarize(mdata_obj)
69
+ # mu.MuData.summarize = _summarize
70
+ return mdata_obj
71
+
72
+ if coords is None:
73
+ raise ValueError("Spatial coordinate file (coords) must be provided.")
74
+ coords_df = pd.read_csv(coords, index_col=0)
75
+
76
+ gene_df = pd.read_csv(g_file, index_col=0) if g_file else None
77
+ met_df = pd.read_csv(m_file, index_col=0) if m_file else None
78
+
79
+ common_idx = coords_df.index
80
+ if gene_df is not None:
81
+ common_idx = common_idx.intersection(gene_df.index)
82
+ if met_df is not None:
83
+ common_idx = common_idx.intersection(met_df.index)
84
+
85
+ if len(common_idx) == 0:
86
+ raise ValueError("No common cell indices found among provided datasets.")
87
+
88
+ coords_df = coords_df.loc[common_idx]
89
+ if gene_df is not None:
90
+ gene_df = gene_df.loc[common_idx]
91
+ if met_df is not None:
92
+ met_df = met_df.loc[common_idx]
93
+
94
+ adatas = {}
95
+ if gene_df is not None:
96
+ adatas["gene"] = ad.AnnData(
97
+ X=gene_df.to_numpy(dtype=float),
98
+ obs=pd.DataFrame(index=common_idx),
99
+ var=pd.DataFrame(index=gene_df.columns)
100
+ )
101
+ if met_df is not None:
102
+ adatas["metabolite"] = ad.AnnData(
103
+ X=met_df.to_numpy(dtype=float),
104
+ obs=pd.DataFrame(index=common_idx),
105
+ var=pd.DataFrame(index=met_df.columns)
106
+ )
107
+
108
+ if not adatas:
109
+ raise ValueError("No omic data provided. Must provide at least one of g_file or m_file.")
110
+
111
+ mdata_obj = MuData(adatas)
112
+
113
+ obs_meta = pd.DataFrame(index=common_idx)
114
+ if cell_type is not None:
115
+ celltype_df = pd.read_csv(cell_type, index_col=0)
116
+ obs_meta["cell_type"] = celltype_df.loc[common_idx]
117
+ if region is not None:
118
+ region_df = pd.read_csv(region, index_col=0)
119
+ obs_meta["region"] = region_df.loc[common_idx]
120
+ mdata_obj.obsm = obs_meta
121
+
122
+ mdata_obj.obsm["spatial"] = coords_df
123
+ mdata_obj = _attach_summarize(mdata_obj)
124
+ mu.MuData.summarize = _summarize
125
+ return mdata_obj
@@ -0,0 +1,6 @@
1
+ from ._sampler import sample
2
+ from ._region import compute_regional_metabolite
3
+ from ._celltype import celltype_met
4
+ from ._space import spatvar_metabolite
5
+ from ._genemet import compute_genemet_sci
6
+ from ._spc import compute_spc_ct, compute_spc_sd
@@ -0,0 +1,129 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.spatial.distance import pdist, squareform
7
+
8
+ def _bh_fdr_rowwise(p_mat: np.ndarray):
9
+ p = np.asarray(p_mat, dtype=float)
10
+ K, M = p.shape
11
+ q = np.empty_like(p)
12
+ for i in range(K):
13
+ pi = p[i].copy()
14
+ order = np.argsort(pi, kind="mergesort")
15
+ ranks = np.arange(1, M + 1, dtype=float)
16
+ qi = pi[order] * M / ranks
17
+ qi = np.minimum.accumulate(qi[::-1])[::-1]
18
+ out = np.empty_like(qi)
19
+ out[order] = qi
20
+ q[i] = np.clip(out, 0, 1)
21
+ return q
22
+
23
+ def _significant_by_ct_single_null(
24
+ SCI_real: pd.DataFrame,
25
+ SCI_null: pd.DataFrame,
26
+ alpha: float = 0.05,
27
+ two_sided: bool = True,
28
+ ):
29
+
30
+ rows = SCI_real.index.intersection(SCI_null.index)
31
+ cols = SCI_real.columns.intersection(SCI_null.columns)
32
+ R = SCI_real.loc[rows, cols].astype(float)
33
+ N = SCI_null.loc[rows, cols].astype(float)
34
+
35
+ K, M = R.shape
36
+ real_vals = R.to_numpy() # K×M
37
+ null_vals = N.to_numpy() # K×M
38
+
39
+ if two_sided:
40
+ real_use = np.abs(real_vals)
41
+ null_use = np.abs(null_vals)
42
+ else:
43
+ real_use = real_vals
44
+ null_use = null_vals
45
+
46
+ p_emp = np.empty_like(real_vals, dtype=float)
47
+ for i in range(K):
48
+ base = null_use[i]
49
+ for j in range(M):
50
+ p_emp[i, j] = (np.sum(base >= real_use[i, j]) + 1.0) / (M + 1.0)
51
+
52
+ q_bh = _bh_fdr_rowwise(p_emp)
53
+
54
+
55
+ long = []
56
+ for i, ct in enumerate(rows):
57
+ for j, met in enumerate(cols):
58
+ long.append({
59
+ "cell_type": ct,
60
+ "metabolite": met,
61
+ "sci": float(real_vals[i, j]),
62
+ "p_emp": float(p_emp[i, j]),
63
+ "q_bh": float(q_bh[i, j]),
64
+ })
65
+ long_df = pd.DataFrame(long)
66
+ long_df["significant"] = long_df["q_bh"] < alpha
67
+
68
+ per_ct = {}
69
+ sig_rows = []
70
+ for ct, sub in long_df.groupby("cell_type"):
71
+ sig = sub[sub["significant"]].sort_values(["q_bh", "p_emp", "sci"])
72
+ per_ct[ct] = sig
73
+ if not sig.empty:
74
+ sig_rows.append(sig)
75
+ return long_df, per_ct
76
+
77
+ def _zscore_cols(A):
78
+ mu = A.mean(axis=0, keepdims=True)
79
+ sd = A.std(axis=0, ddof=0, keepdims=True)
80
+ sd = np.where(sd == 0, 1.0, sd)
81
+ return (A - mu) / sd
82
+
83
+ def _build_W(xy, l="dmin", norm="row"):
84
+ D = squareform(pdist(xy, metric="euclidean"))
85
+ d_nonzero = D[D > 0]
86
+ if isinstance(l, str) and l == "dmin":
87
+ if d_nonzero.size == 0:
88
+ raise ValueError("there is no dmin.")
89
+ lval = float(np.min(d_nonzero))
90
+ else:
91
+ lval = float(l)
92
+ W = np.exp(-(D**2) / (2.0 * lval * lval))
93
+ np.fill_diagonal(W, 1.0)
94
+ if norm == "row":
95
+ W = W / W.sum(axis=1, keepdims=True)
96
+ elif norm == "global":
97
+ W = W / W.sum()
98
+ return W
99
+
100
+ def _compute_sci(Ydf, Cdf, xy, normalized=True, l="dmin", w_norm="row"):
101
+ W = _build_W(xy, l=l, norm=w_norm)
102
+ Cz = _zscore_cols(Cdf.to_numpy())
103
+ Yz = _zscore_cols(Ydf.to_numpy())
104
+
105
+ WY = W @ Yz
106
+ SCI_raw = Cz.T @ WY
107
+
108
+ if not normalized:
109
+ return pd.DataFrame(SCI_raw, index=Cdf.columns, columns=Ydf.columns)
110
+
111
+ xvar = np.sum(Cz**2, axis=0) # K
112
+ yvar = np.sum(Yz**2, axis=0) # M
113
+ den = np.sqrt(xvar[:, None] * yvar[None, :])
114
+ den = np.where(den == 0, 1.0, den)
115
+ SCI_corr = SCI_raw / den
116
+
117
+ return pd.DataFrame(SCI_corr, index=Cdf.columns, columns=Ydf.columns)
118
+
119
+ def celltype_met(mdata, alpha = 0.05, two_sided = True):
120
+ sci = _compute_sci(mdata.mod['metabolite'].to_df(), mdata.obsm['cell_type'], mdata.obsm['spatial'].values, normalized=True)
121
+ sci_null = _compute_sci(mdata.uns['metabolite_null'].to_df(), mdata.obsm['cell_type'], mdata.obsm['spatial'].values, normalized=True)
122
+ long_df, per_ct = _significant_by_ct_single_null(
123
+ sci, sci_null,
124
+ alpha=alpha,
125
+ two_sided=two_sided
126
+ )
127
+ mdata.uns['celltype_met'] = long_df
128
+ mdata.uns['celltype_met_per_ct'] = per_ct
129
+ return mdata
@@ -0,0 +1,226 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from scipy.spatial.distance import pdist, squareform
5
+ import math
6
+ from scipy.spatial import cKDTree
7
+ import matplotlib.pyplot as plt
8
+ import networkx as nx
9
+ import re
10
+
11
+ def _build_weighted_radius_graph(coords, radius=None, l=None, weight_key="w"):
12
+
13
+ if (radius is None) and (l is None):
14
+ raise ValueError("radius or l should be supplied")
15
+ if l is None:
16
+ l = float(radius)
17
+ if radius is None:
18
+ radius = 2.0 * float(l)
19
+
20
+ tree = cKDTree(coords)
21
+ coo = tree.sparse_distance_matrix(tree, max_distance=radius, output_type="coo_matrix")
22
+ G = nx.Graph()
23
+ G.add_nodes_from(range(coords.shape[0]))
24
+ inv_two_l2 = 1.0 / (2.0 * float(l) * float(l))
25
+
26
+ for i, j, d in zip(coo.row, coo.col, coo.data):
27
+ if i < j:
28
+ if d <= l:
29
+ w = 1.0
30
+ else:
31
+ w = float(np.exp(-(d * d) * inv_two_l2))
32
+ if w > 0.0:
33
+ G.add_edge(i, j, **{weight_key: w})
34
+ return G
35
+
36
+ def _target_diag_M_from_matrix(G, X_mat, weight_key="w"):
37
+ M = X_mat.shape[1]
38
+ Mt = np.zeros(M, dtype=float)
39
+ for u, v, data in G.edges(data=True):
40
+ w = data.get(weight_key, 1.0)
41
+ Mt += w * X_mat[u] * X_mat[v]
42
+ return Mt
43
+
44
+ def _build_W_from_coords(coords_xy, l):
45
+ D_condensed = pdist(coords_xy, metric="euclidean")
46
+ dist = squareform(D_condensed)
47
+ W = np.zeros_like(dist, dtype=float)
48
+
49
+ mask = dist > 0
50
+ W[mask] = np.exp(-(dist[mask]**2) / (2.0 * l**2))
51
+ np.fill_diagonal(W, 1.0)
52
+
53
+ row_sums = W.sum(axis=1, keepdims=True)
54
+ row_sums[row_sums == 0.0] = 1.0
55
+ W /= row_sums
56
+ return W
57
+
58
+ def _zscore_cols(A: np.ndarray) -> np.ndarray:
59
+ mu = A.mean(axis=0, keepdims=True)
60
+ sd = A.std(axis=0, ddof=0, keepdims=True)
61
+ sd[sd == 0.0] = 1.0
62
+ return (A - mu) / sd
63
+
64
+ def bh_fdr(p_values_1d):
65
+
66
+ p = np.asarray(p_values_1d, dtype=float)
67
+ n = p.size
68
+ order = np.argsort(p)
69
+ ranked = p[order]
70
+ q = np.empty_like(ranked)
71
+ prev = 1.0
72
+ for i in range(n-1, -1, -1):
73
+ rank = i + 1
74
+ q_i = ranked[i] * n / rank
75
+ if q_i > prev:
76
+ q_i = prev
77
+ prev = q_i
78
+ q[i] = q_i
79
+ out = np.empty_like(q)
80
+ out[order] = q
81
+ return out
82
+
83
+ def pooled_empirical_p_two(real_mat, null_mat):
84
+ real = real_mat.ravel()
85
+ null_vals = null_mat.ravel()
86
+ null_vals = null_vals[np.isfinite(null_vals)]
87
+
88
+ null_sorted = np.sort(null_vals)
89
+ N = null_sorted.size
90
+
91
+ idx_right = np.searchsorted(null_sorted, real, side='right')
92
+ count_le = idx_right
93
+ count_ge = N - np.searchsorted(null_sorted, real, side='left')
94
+
95
+ p_left = (count_le + 1) / (N + 1)
96
+ p_right = (count_ge + 1) / (N + 1)
97
+
98
+ p_two = 2 * np.minimum(p_left, p_right)
99
+ p_two = np.minimum(p_two, 1.0)
100
+ return p_two.reshape(real_mat.shape)
101
+
102
+ def compute_genemet_sci(mdata, n_bins=10, alpha=0.05):
103
+ dmin = math.sqrt(2.0); l = 4*dmin
104
+ G = _build_weighted_radius_graph(mdata.obsm['spatial'].values, l=l, radius=float("inf"), weight_key="w")
105
+ X_all = np.asarray(mdata.mod['metabolite'].to_df(), dtype=float)
106
+ X_df = mdata.mod['metabolite'].to_df()
107
+ X_rna = mdata.mod['gene'].to_df().to_numpy()
108
+ met_names = X_df.columns.tolist()
109
+ X_mean = X_all.mean(axis=0, keepdims=True)
110
+ X_std = X_all.std(axis=0, ddof=0, keepdims=True); X_std[X_std==0] = 1.0
111
+ Xz = (X_all - X_mean) / X_std
112
+
113
+ M0_varnorm = _target_diag_M_from_matrix(G, Xz, weight_key="w")
114
+ M0 = np.asarray(M0_varnorm, dtype=float)
115
+ q = np.linspace(0, 1, n_bins + 1)
116
+ bin_edges = np.quantile(M0, q, method="linear")
117
+ edges = [bin_edges[0]]
118
+ for v in bin_edges[1:]:
119
+ if v > edges[-1]:
120
+ edges.append(v)
121
+ bin_edges = np.array(edges, dtype=float)
122
+ k = len(bin_edges) - 1
123
+
124
+ s = pd.Series(M0, index=pd.Index(met_names, name="metabolite"))
125
+ idx = np.searchsorted(bin_edges, s.values, side="right") - 1
126
+ idx = np.clip(idx, 0, k-1)
127
+
128
+ bin_mets = [[] for _ in range(k)]
129
+ for name, b in zip(s.index, idx):
130
+ bin_mets[b].append(name)
131
+
132
+ name_to_idx = {name: i for i, name in enumerate(met_names)}
133
+ bin_counts = []
134
+ bin_dfs = {}
135
+ for i, mets in enumerate(bin_mets):
136
+ if not mets:
137
+ bin_counts.append(0)
138
+ continue
139
+ cols = [name_to_idx[m] for m in mets if m in name_to_idx]
140
+ bin_counts.append(len(cols))
141
+ if cols:
142
+ df_bin = pd.DataFrame(
143
+ X_all[:, cols],
144
+ columns=[met_names[j] for j in cols],
145
+ index=pd.Index(mdata.obs_names, name="spot_id")
146
+ )
147
+ left, right = float(bin_edges[i]), float(bin_edges[i+1])
148
+ bin_dfs[i+1] = df_bin
149
+ l = dmin
150
+ W = _build_W_from_coords(mdata.obsm['spatial'].values, l=l)
151
+ S0 = float(W.sum())
152
+ if S0 == 0: S0 = 1.0
153
+ genemet_sci_bins = {}
154
+ for i in bin_dfs:
155
+ X_met = bin_dfs[i].to_numpy(dtype=float)
156
+ X_rna_std = (X_rna - X_rna.mean(axis=0, keepdims=True)) / np.maximum(X_rna.std(axis=0, ddof=0, keepdims=True), 1e-12)
157
+ X_met_std = (X_met - X_met.mean(axis=0, keepdims=True)) / np.maximum(X_met.std(axis=0, ddof=0, keepdims=True), 1e-12)
158
+ WY = W.dot(X_met_std)
159
+ WY = W @ X_met_std
160
+ SCI = (X_rna_std.T @ WY) / S0
161
+ sci_df = pd.DataFrame(SCI, index=mdata.mod['gene'].to_df().columns, columns=bin_dfs[i].columns)
162
+ genemet_sci_bins[i] = sci_df
163
+
164
+ X_met_null_df = mdata.uns['metabolite_null'].to_df()
165
+ D = squareform(pdist(mdata.obsm['spatial'].values, metric="euclidean"))
166
+ pos = D > 0
167
+ dmin = float(D[pos].min()) if np.any(pos) else np.finfo(float).eps
168
+ l = dmin
169
+ W = _build_W_from_coords(mdata.obsm['spatial'].values, l=l)
170
+ S0 = float(W.sum())
171
+ if S0 == 0: S0 = 1.0
172
+ genemet_sci_bins_null = {}
173
+ for i in bin_dfs:
174
+ cols_present = [c for c in bin_dfs[i].columns if c in X_met_null_df.columns]
175
+ X_met_null = X_met_null_df[cols_present].to_numpy(dtype=float)
176
+ X_rna_std = _zscore_cols(X_rna)
177
+ X_met_null_std = _zscore_cols(X_met_null)
178
+
179
+ WY = W.dot(X_met_null_std)
180
+ WY = W @ X_met_null_std
181
+ SCI = (X_rna_std.T @ WY) / S0
182
+ sci_df = pd.DataFrame(SCI, index=mdata.mod['gene'].to_df().columns, columns=bin_dfs[i].columns)
183
+ genemet_sci_bins_null[i] = sci_df
184
+ results = {}
185
+ for n in genemet_sci_bins:
186
+ common_genes = genemet_sci_bins[n].index.intersection(genemet_sci_bins_null[n].index)
187
+ common_mets = genemet_sci_bins[n].columns.intersection(genemet_sci_bins_null[n].columns)
188
+ real_df = genemet_sci_bins[n].loc[common_genes, common_mets]
189
+ null_df = genemet_sci_bins_null[n].loc[common_genes, common_mets]
190
+ p_two = pooled_empirical_p_two(real_df.values, null_df.values)
191
+ q_two = bh_fdr(p_two.ravel()).reshape(p_two.shape)
192
+
193
+ sig_mask = (q_two < alpha)
194
+
195
+ res = []
196
+ genes = real_df.index.tolist()
197
+ mets = real_df.columns.tolist()
198
+ for i, g in enumerate(genes):
199
+ for j, m in enumerate(mets):
200
+ res.append({
201
+ "gene": g,
202
+ "metabolite": m,
203
+ "SCI": float(real_df.iloc[i, j]),
204
+ "p_value": float(p_two[i, j]),
205
+ "q_value": float(q_two[i, j]),
206
+ "significant": bool(sig_mask[i, j])
207
+ })
208
+ res_df = pd.DataFrame(res)
209
+ print(int(sig_mask.sum()))
210
+ results[n] = res_df
211
+ dfs = []
212
+ for i in results:
213
+ results[i]["bin_index"] = i
214
+ dfs.append(results[i])
215
+ merged_eq = pd.concat(dfs, ignore_index=True, sort=False)
216
+ meta_cols = ["method", "bin_index", "bin_min", "bin_max", "bin_label", "source_file"]
217
+ front = [c for c in meta_cols if c in merged_eq.columns]
218
+ rest = [c for c in merged_eq.columns if c not in front]
219
+ merged_eq = merged_eq[front + rest]
220
+ summary = (
221
+ merged_eq.groupby(["bin_index"], dropna=False)
222
+ .size().reset_index(name="significant_pairs")
223
+ )
224
+ mdata.uns['genemet_sci'] = merged_eq
225
+ mdata.uns['genemet_sci_summary'] = summary
226
+ return mdata