gsMap 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/__init__.py +0 -0
- gsMap/GNN/adjacency_matrix.py +73 -75
- gsMap/GNN/model.py +92 -90
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +4 -3
- gsMap/cauchy_combination_test.py +144 -141
- gsMap/config.py +1312 -805
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +352 -273
- gsMap/find_latent_representation.py +141 -133
- gsMap/format_sumstats.py +439 -407
- gsMap/generate_ldscore.py +762 -618
- gsMap/latent_to_gene.py +284 -234
- gsMap/main.py +40 -31
- gsMap/report.py +174 -160
- gsMap/run_all_mode.py +235 -195
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +434 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +768 -735
- gsMap/utils/jackknife.py +518 -514
- gsMap/utils/manhattan_plot.py +612 -639
- gsMap/utils/regression_read.py +277 -294
- gsMap/visualize.py +217 -199
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/LICENSE +21 -21
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/METADATA +23 -8
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.1.dist-info/RECORD +0 -31
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/latent_to_gene.py
CHANGED
@@ -1,234 +1,284 @@
|
|
1
|
-
import logging
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import numpy as np
|
5
|
-
import pandas as pd
|
6
|
-
import scanpy as sc
|
7
|
-
|
8
|
-
from scipy.stats import rankdata
|
9
|
-
from sklearn.metrics.pairwise import cosine_similarity
|
10
|
-
from sklearn.neighbors import NearestNeighbors
|
11
|
-
from tqdm import tqdm
|
12
|
-
|
13
|
-
from gsMap.config import LatentToGeneConfig
|
14
|
-
|
15
|
-
logger = logging.getLogger(__name__)
|
16
|
-
|
17
|
-
|
18
|
-
def find_neighbors(coor, num_neighbour):
|
19
|
-
"""
|
20
|
-
Find Neighbors of each cell (based on spatial coordinates).
|
21
|
-
"""
|
22
|
-
nbrs = NearestNeighbors(n_neighbors=num_neighbour).fit(coor)
|
23
|
-
distances, indices = nbrs.kneighbors(coor, return_distance=True)
|
24
|
-
cell_indices = np.arange(coor.shape[0])
|
25
|
-
cell1 = np.repeat(cell_indices, indices.shape[1])
|
26
|
-
cell2 = indices.flatten()
|
27
|
-
distance = distances.flatten()
|
28
|
-
spatial_net = pd.DataFrame({
|
29
|
-
return spatial_net
|
30
|
-
|
31
|
-
|
32
|
-
def build_spatial_net(adata, annotation, num_neighbour):
|
33
|
-
"""
|
34
|
-
Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
|
35
|
-
"""
|
36
|
-
logger.info(
|
37
|
-
|
38
|
-
coor = adata.obsm[
|
39
|
-
if annotation is not None:
|
40
|
-
logger.info(
|
41
|
-
spatial_net_list = []
|
42
|
-
# Cells with annotations
|
43
|
-
for ct in adata.obs[annotation].dropna().unique():
|
44
|
-
idx = np.where(adata.obs[annotation] == ct)[0]
|
45
|
-
coor_temp = coor[idx, :]
|
46
|
-
spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
|
47
|
-
# Map back to original indices
|
48
|
-
spatial_net_temp[
|
49
|
-
spatial_net_temp[
|
50
|
-
spatial_net_list.append(spatial_net_temp)
|
51
|
-
logger.info(f
|
52
|
-
|
53
|
-
# Cells labeled as nan
|
54
|
-
if pd.isnull(adata.obs[annotation]).any():
|
55
|
-
idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
|
56
|
-
logger.info(f
|
57
|
-
spatial_net_temp = find_neighbors(coor, num_neighbour)
|
58
|
-
spatial_net_temp = spatial_net_temp[spatial_net_temp[
|
59
|
-
spatial_net_list.append(spatial_net_temp)
|
60
|
-
spatial_net = pd.concat(spatial_net_list, axis=0)
|
61
|
-
else:
|
62
|
-
logger.info(
|
63
|
-
spatial_net = find_neighbors(coor, num_neighbour)
|
64
|
-
|
65
|
-
return spatial_net
|
66
|
-
|
67
|
-
|
68
|
-
def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
|
69
|
-
num_neighbour = config.num_neighbour
|
70
|
-
annotations = config.annotation
|
71
|
-
|
72
|
-
cell_use_pos = spatial_net_dict.get(cell_pos, [])
|
73
|
-
if len(cell_use_pos) == 0:
|
74
|
-
return []
|
75
|
-
|
76
|
-
cell_latent = coor_latent[cell_pos, :].reshape(1, -1)
|
77
|
-
neighbors_latent = coor_latent[cell_use_pos, :]
|
78
|
-
similarity = cosine_similarity(cell_latent, neighbors_latent).reshape(-1)
|
79
|
-
|
80
|
-
if annotations is not None:
|
81
|
-
cell_annotation = cell_annotations[cell_pos]
|
82
|
-
neighbor_annotations = cell_annotations[cell_use_pos]
|
83
|
-
mask = neighbor_annotations == cell_annotation
|
84
|
-
if not np.any(mask):
|
85
|
-
return []
|
86
|
-
similarity = similarity[mask]
|
87
|
-
cell_use_pos = cell_use_pos[mask]
|
88
|
-
|
89
|
-
if len(similarity) == 0:
|
90
|
-
return []
|
91
|
-
|
92
|
-
indices = np.argsort(-similarity) # descending order
|
93
|
-
top_indices = indices[:num_neighbour]
|
94
|
-
cell_select_pos = cell_use_pos[top_indices]
|
95
|
-
return cell_select_pos
|
96
|
-
|
97
|
-
|
98
|
-
def compute_regional_mkscore(
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
if
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
#
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
import scanpy as sc
|
7
|
+
import scipy
|
8
|
+
from scipy.stats import gmean, rankdata
|
9
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
10
|
+
from sklearn.neighbors import NearestNeighbors
|
11
|
+
from tqdm import tqdm, trange
|
12
|
+
|
13
|
+
from gsMap.config import LatentToGeneConfig
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def find_neighbors(coor, num_neighbour):
|
19
|
+
"""
|
20
|
+
Find Neighbors of each cell (based on spatial coordinates).
|
21
|
+
"""
|
22
|
+
nbrs = NearestNeighbors(n_neighbors=num_neighbour).fit(coor)
|
23
|
+
distances, indices = nbrs.kneighbors(coor, return_distance=True)
|
24
|
+
cell_indices = np.arange(coor.shape[0])
|
25
|
+
cell1 = np.repeat(cell_indices, indices.shape[1])
|
26
|
+
cell2 = indices.flatten()
|
27
|
+
distance = distances.flatten()
|
28
|
+
spatial_net = pd.DataFrame({"Cell1": cell1, "Cell2": cell2, "Distance": distance})
|
29
|
+
return spatial_net
|
30
|
+
|
31
|
+
|
32
|
+
def build_spatial_net(adata, annotation, num_neighbour):
|
33
|
+
"""
|
34
|
+
Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
|
35
|
+
"""
|
36
|
+
logger.info("------Building spatial graph based on spatial coordinates...")
|
37
|
+
|
38
|
+
coor = adata.obsm["spatial"]
|
39
|
+
if annotation is not None:
|
40
|
+
logger.info("Cell annotations are provided...")
|
41
|
+
spatial_net_list = []
|
42
|
+
# Cells with annotations
|
43
|
+
for ct in adata.obs[annotation].dropna().unique():
|
44
|
+
idx = np.where(adata.obs[annotation] == ct)[0]
|
45
|
+
coor_temp = coor[idx, :]
|
46
|
+
spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
|
47
|
+
# Map back to original indices
|
48
|
+
spatial_net_temp["Cell1"] = idx[spatial_net_temp["Cell1"].values]
|
49
|
+
spatial_net_temp["Cell2"] = idx[spatial_net_temp["Cell2"].values]
|
50
|
+
spatial_net_list.append(spatial_net_temp)
|
51
|
+
logger.info(f"{ct}: {coor_temp.shape[0]} cells")
|
52
|
+
|
53
|
+
# Cells labeled as nan
|
54
|
+
if pd.isnull(adata.obs[annotation]).any():
|
55
|
+
idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
|
56
|
+
logger.info(f"Nan: {len(idx_nan)} cells")
|
57
|
+
spatial_net_temp = find_neighbors(coor, num_neighbour)
|
58
|
+
spatial_net_temp = spatial_net_temp[spatial_net_temp["Cell1"].isin(idx_nan)]
|
59
|
+
spatial_net_list.append(spatial_net_temp)
|
60
|
+
spatial_net = pd.concat(spatial_net_list, axis=0)
|
61
|
+
else:
|
62
|
+
logger.info("Cell annotations are not provided...")
|
63
|
+
spatial_net = find_neighbors(coor, num_neighbour)
|
64
|
+
|
65
|
+
return spatial_net.groupby("Cell1")["Cell2"].apply(np.array).to_dict()
|
66
|
+
|
67
|
+
|
68
|
+
def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
|
69
|
+
num_neighbour = config.num_neighbour
|
70
|
+
annotations = config.annotation
|
71
|
+
|
72
|
+
cell_use_pos = spatial_net_dict.get(cell_pos, [])
|
73
|
+
if len(cell_use_pos) == 0:
|
74
|
+
return []
|
75
|
+
|
76
|
+
cell_latent = coor_latent[cell_pos, :].reshape(1, -1)
|
77
|
+
neighbors_latent = coor_latent[cell_use_pos, :]
|
78
|
+
similarity = cosine_similarity(cell_latent, neighbors_latent).reshape(-1)
|
79
|
+
|
80
|
+
if annotations is not None:
|
81
|
+
cell_annotation = cell_annotations[cell_pos]
|
82
|
+
neighbor_annotations = cell_annotations[cell_use_pos]
|
83
|
+
mask = neighbor_annotations == cell_annotation
|
84
|
+
if not np.any(mask):
|
85
|
+
return []
|
86
|
+
similarity = similarity[mask]
|
87
|
+
cell_use_pos = cell_use_pos[mask]
|
88
|
+
|
89
|
+
if len(similarity) == 0:
|
90
|
+
return []
|
91
|
+
|
92
|
+
indices = np.argsort(-similarity) # descending order
|
93
|
+
top_indices = indices[:num_neighbour]
|
94
|
+
cell_select_pos = cell_use_pos[top_indices]
|
95
|
+
return cell_select_pos
|
96
|
+
|
97
|
+
|
98
|
+
def compute_regional_mkscore(
|
99
|
+
cell_pos,
|
100
|
+
spatial_net_dict,
|
101
|
+
coor_latent,
|
102
|
+
config,
|
103
|
+
cell_annotations,
|
104
|
+
ranks,
|
105
|
+
frac_whole,
|
106
|
+
adata_X_bool,
|
107
|
+
):
|
108
|
+
"""
|
109
|
+
Compute gmean ranks of a region.
|
110
|
+
"""
|
111
|
+
cell_select_pos = find_neighbors_regional(
|
112
|
+
cell_pos, spatial_net_dict, coor_latent, config, cell_annotations
|
113
|
+
)
|
114
|
+
if len(cell_select_pos) == 0:
|
115
|
+
return np.zeros(ranks.shape[1], dtype=np.float16)
|
116
|
+
|
117
|
+
# Ratio of expression ranks
|
118
|
+
ranks_tg = ranks[cell_select_pos, :]
|
119
|
+
gene_ranks_region = gmean(ranks_tg, axis=0)
|
120
|
+
gene_ranks_region[gene_ranks_region <= 1] = 0
|
121
|
+
|
122
|
+
if not config.no_expression_fraction:
|
123
|
+
# Ratio of expression fractions
|
124
|
+
frac_focal = adata_X_bool[cell_select_pos, :].sum(axis=0).A1 / len(cell_select_pos)
|
125
|
+
frac_region = frac_focal / frac_whole
|
126
|
+
frac_region[frac_region <= 1] = 0
|
127
|
+
frac_region[frac_region > 1] = 1
|
128
|
+
|
129
|
+
# Simultaneously consider the ratio of expression fractions and ranks
|
130
|
+
gene_ranks_region = gene_ranks_region * frac_region
|
131
|
+
|
132
|
+
mkscore = np.exp(gene_ranks_region**1.5) - 1
|
133
|
+
return mkscore.astype(np.float16, copy=False)
|
134
|
+
|
135
|
+
|
136
|
+
def run_latent_to_gene(config: LatentToGeneConfig):
|
137
|
+
logger.info("------Loading the spatial data...")
|
138
|
+
adata = sc.read_h5ad(config.hdf5_with_latent_path)
|
139
|
+
logger.info(f"Loaded spatial data with {adata.n_obs} cells and {adata.n_vars} genes.")
|
140
|
+
|
141
|
+
if config.annotation is not None:
|
142
|
+
logger.info(f"------Cell annotations are provided as {config.annotation}...")
|
143
|
+
initial_cell_count = adata.n_obs
|
144
|
+
adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
|
145
|
+
logger.info(
|
146
|
+
f"Removed null annotations. Cells retained: {adata.n_obs} (initial: {initial_cell_count})."
|
147
|
+
)
|
148
|
+
|
149
|
+
# Homologs transformation
|
150
|
+
if config.homolog_file is not None:
|
151
|
+
logger.info(f"------Transforming the {config.species} to HUMAN_GENE_SYM...")
|
152
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
153
|
+
if homologs.shape[1] != 2:
|
154
|
+
raise ValueError(
|
155
|
+
"Homologs file must have two columns: one for the species and one for the human gene symbol."
|
156
|
+
)
|
157
|
+
|
158
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
159
|
+
homologs.set_index(config.species, inplace=True)
|
160
|
+
adata = adata[:, adata.var_names.isin(homologs.index)]
|
161
|
+
logger.info(f"{adata.shape[1]} genes retained after homolog transformation.")
|
162
|
+
if adata.shape[1] < 100:
|
163
|
+
raise ValueError("Too few genes retained in ST data (<100).")
|
164
|
+
adata.var_names = homologs.loc[adata.var_names, "HUMAN_GENE_SYM"].values
|
165
|
+
adata = adata[:, ~adata.var_names.duplicated()]
|
166
|
+
|
167
|
+
if config.annotation is not None:
|
168
|
+
cell_annotations = adata.obs[config.annotation].values
|
169
|
+
logger.info(f"Using cell annotations for {len(cell_annotations)} cells.")
|
170
|
+
else:
|
171
|
+
cell_annotations = None
|
172
|
+
|
173
|
+
# Build the spatial graph
|
174
|
+
logger.info("------Building the spatial graph...")
|
175
|
+
spatial_net_dict = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
|
176
|
+
logger.info("Spatial graph built successfully.")
|
177
|
+
|
178
|
+
# Extract the latent representation
|
179
|
+
logger.info("------Extracting the latent representation...")
|
180
|
+
coor_latent = adata.obsm[config.latent_representation]
|
181
|
+
coor_latent = coor_latent.astype(np.float32)
|
182
|
+
logger.info("Latent representation extracted.")
|
183
|
+
|
184
|
+
# Geometric mean across slices
|
185
|
+
gM = None
|
186
|
+
frac_whole = None
|
187
|
+
if config.gM_slices is not None:
|
188
|
+
logger.info("Geometrical mean across multiple slices is provided.")
|
189
|
+
gM_df = pd.read_parquet(config.gM_slices)
|
190
|
+
if config.species is not None:
|
191
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
192
|
+
if homologs.shape[1] < 2:
|
193
|
+
raise ValueError(
|
194
|
+
"Homologs file must have at least two columns: one for the species and one for the human gene symbol."
|
195
|
+
)
|
196
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
197
|
+
homologs.set_index(config.species, inplace=True)
|
198
|
+
gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
|
199
|
+
gM_df.index = homologs.loc[gM_df.index, "HUMAN_GENE_SYM"].values
|
200
|
+
common_genes = np.intersect1d(adata.var_names, gM_df.index)
|
201
|
+
gM_df = gM_df.loc[common_genes]
|
202
|
+
gM = gM_df["G_Mean"].values
|
203
|
+
frac_whole = gM_df["frac"].values
|
204
|
+
adata = adata[:, common_genes]
|
205
|
+
logger.info(
|
206
|
+
f"{len(common_genes)} common genes retained after loading the cross slice geometric mean."
|
207
|
+
)
|
208
|
+
|
209
|
+
# Compute ranks after taking common genes with gM_slices
|
210
|
+
logger.info("------Ranking the spatial data...")
|
211
|
+
if not scipy.sparse.issparse(adata.X):
|
212
|
+
adata_X = scipy.sparse.csr_matrix(adata.X)
|
213
|
+
elif isinstance(adata.X, scipy.sparse.csr_matrix):
|
214
|
+
adata_X = adata.X # Avoid copying if already CSR
|
215
|
+
else:
|
216
|
+
adata_X = adata.X.tocsr()
|
217
|
+
|
218
|
+
# Create mappings
|
219
|
+
n_cells = adata.n_obs
|
220
|
+
n_genes = adata.n_vars
|
221
|
+
|
222
|
+
ranks = np.zeros((n_cells, adata.n_vars), dtype=np.float16)
|
223
|
+
for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
|
224
|
+
data = adata_X[i, :].toarray().flatten()
|
225
|
+
ranks[i, :] = rankdata(data, method="average")
|
226
|
+
|
227
|
+
if gM is None:
|
228
|
+
gM = gmean(ranks, axis=0)
|
229
|
+
gM = gM.astype(np.float16)
|
230
|
+
|
231
|
+
adata_X_bool = adata_X.astype(bool)
|
232
|
+
if frac_whole is None:
|
233
|
+
# Compute the fraction of each gene across cells
|
234
|
+
frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
|
235
|
+
logger.info("Gene expression proportion of each gene across cells computed.")
|
236
|
+
else:
|
237
|
+
logger.info(
|
238
|
+
"Gene expression proportion of each gene across cells in all sections has been provided."
|
239
|
+
)
|
240
|
+
|
241
|
+
frac_whole += 1e-12 # Avoid division by zero
|
242
|
+
# Normalize the ranks
|
243
|
+
ranks /= gM
|
244
|
+
|
245
|
+
def compute_mk_score_wrapper(cell_pos):
|
246
|
+
return compute_regional_mkscore(
|
247
|
+
cell_pos,
|
248
|
+
spatial_net_dict,
|
249
|
+
coor_latent,
|
250
|
+
config,
|
251
|
+
cell_annotations,
|
252
|
+
ranks,
|
253
|
+
frac_whole,
|
254
|
+
adata_X_bool,
|
255
|
+
)
|
256
|
+
|
257
|
+
logger.info("------Computing marker scores...")
|
258
|
+
mk_score = np.zeros((n_cells, n_genes), dtype=np.float16)
|
259
|
+
for cell_pos in trange(n_cells, desc="Calculating marker scores"):
|
260
|
+
mk_score[cell_pos, :] = compute_mk_score_wrapper(cell_pos)
|
261
|
+
|
262
|
+
mk_score = mk_score.T
|
263
|
+
logger.info("Marker scores computed.")
|
264
|
+
|
265
|
+
# Remove mitochondrial genes
|
266
|
+
gene_names = adata.var_names.values.astype(str)
|
267
|
+
mt_gene_mask = ~(np.char.startswith(gene_names, "MT-") | np.char.startswith(gene_names, "mt-"))
|
268
|
+
mk_score = mk_score[mt_gene_mask, :]
|
269
|
+
gene_names = gene_names[mt_gene_mask]
|
270
|
+
logger.info(f"Removed mitochondrial genes. Remaining genes: {len(gene_names)}.")
|
271
|
+
|
272
|
+
# Save the marker scores
|
273
|
+
logger.info("------Saving marker scores ...")
|
274
|
+
output_file_path = Path(config.mkscore_feather_path)
|
275
|
+
output_file_path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
276
|
+
mk_score_df = pd.DataFrame(mk_score, index=gene_names, columns=adata.obs_names)
|
277
|
+
mk_score_df.reset_index(inplace=True)
|
278
|
+
mk_score_df.rename(columns={"index": "HUMAN_GENE_SYM"}, inplace=True)
|
279
|
+
mk_score_df.to_feather(output_file_path)
|
280
|
+
logger.info(f"Marker scores saved to {output_file_path}.")
|
281
|
+
|
282
|
+
# Save the modified adata object to disk
|
283
|
+
adata.write(config.hdf5_with_latent_path)
|
284
|
+
logger.info(f"Modified adata object saved to {config.hdf5_with_latent_path}.")
|
gsMap/main.py
CHANGED
@@ -1,31 +1,40 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
args.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
1
|
+
import argparse
|
2
|
+
|
3
|
+
from gsMap import __version__
|
4
|
+
from gsMap.config import cli_function_registry
|
5
|
+
|
6
|
+
|
7
|
+
def main():
|
8
|
+
parser = create_parser()
|
9
|
+
args = parser.parse_args()
|
10
|
+
if args.subcommand is None:
|
11
|
+
parser.print_help()
|
12
|
+
exit(1)
|
13
|
+
args.func(args)
|
14
|
+
|
15
|
+
|
16
|
+
def create_parser():
|
17
|
+
parser = argparse.ArgumentParser(
|
18
|
+
description=" gsMap: genetically informed spatial mapping of cells for complex traits",
|
19
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
20
|
+
prog="gsMap",
|
21
|
+
)
|
22
|
+
parser.add_argument(
|
23
|
+
"--version", "-v", action="version", version=f"gsMap version {__version__}"
|
24
|
+
)
|
25
|
+
subparsers = parser.add_subparsers(
|
26
|
+
dest="subcommand", help="Subcommands", title="Available subcommands"
|
27
|
+
)
|
28
|
+
for subcommand in cli_function_registry.values():
|
29
|
+
subcommand_parser = subparsers.add_parser(
|
30
|
+
subcommand.name,
|
31
|
+
help=subcommand.description,
|
32
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
33
|
+
)
|
34
|
+
subcommand.add_args_function(subcommand_parser)
|
35
|
+
subcommand_parser.set_defaults(func=subcommand.func)
|
36
|
+
return parser
|
37
|
+
|
38
|
+
|
39
|
+
if __name__ == "__main__":
|
40
|
+
main()
|