gsMap 1.71.2__py3-none-any.whl → 1.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +78 -75
- gsMap/config.py +948 -322
- gsMap/create_slice_mean.py +168 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +29 -27
- gsMap/format_sumstats.py +239 -201
- gsMap/generate_ldscore.py +334 -222
- gsMap/latent_to_gene.py +128 -68
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +87 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +100 -346
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +83 -176
- gsMap/visualize.py +82 -64
- gsmap-1.73.0.dist-info/METADATA +169 -0
- gsmap-1.73.0.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info}/WHEEL +1 -1
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info/licenses}/LICENSE +6 -6
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/METADATA +0 -105
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info}/entry_points.txt +0 -0
gsMap/latent_to_gene.py
CHANGED
@@ -5,11 +5,10 @@ import numpy as np
|
|
5
5
|
import pandas as pd
|
6
6
|
import scanpy as sc
|
7
7
|
import scipy
|
8
|
-
from scipy.stats import gmean
|
9
|
-
from scipy.stats import rankdata
|
8
|
+
from scipy.stats import gmean, rankdata
|
10
9
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
10
|
from sklearn.neighbors import NearestNeighbors
|
12
|
-
from tqdm import tqdm
|
11
|
+
from tqdm import tqdm, trange
|
13
12
|
|
14
13
|
from gsMap.config import LatentToGeneConfig
|
15
14
|
|
@@ -26,7 +25,7 @@ def find_neighbors(coor, num_neighbour):
|
|
26
25
|
cell1 = np.repeat(cell_indices, indices.shape[1])
|
27
26
|
cell2 = indices.flatten()
|
28
27
|
distance = distances.flatten()
|
29
|
-
spatial_net = pd.DataFrame({
|
28
|
+
spatial_net = pd.DataFrame({"Cell1": cell1, "Cell2": cell2, "Distance": distance})
|
30
29
|
return spatial_net
|
31
30
|
|
32
31
|
|
@@ -34,11 +33,11 @@ def build_spatial_net(adata, annotation, num_neighbour):
|
|
34
33
|
"""
|
35
34
|
Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
|
36
35
|
"""
|
37
|
-
logger.info(
|
36
|
+
logger.info("------Building spatial graph based on spatial coordinates...")
|
38
37
|
|
39
|
-
coor = adata.obsm[
|
38
|
+
coor = adata.obsm["spatial"]
|
40
39
|
if annotation is not None:
|
41
|
-
logger.info(
|
40
|
+
logger.info("Cell annotations are provided...")
|
42
41
|
spatial_net_list = []
|
43
42
|
# Cells with annotations
|
44
43
|
for ct in adata.obs[annotation].dropna().unique():
|
@@ -46,24 +45,24 @@ def build_spatial_net(adata, annotation, num_neighbour):
|
|
46
45
|
coor_temp = coor[idx, :]
|
47
46
|
spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
|
48
47
|
# Map back to original indices
|
49
|
-
spatial_net_temp[
|
50
|
-
spatial_net_temp[
|
48
|
+
spatial_net_temp["Cell1"] = idx[spatial_net_temp["Cell1"].values]
|
49
|
+
spatial_net_temp["Cell2"] = idx[spatial_net_temp["Cell2"].values]
|
51
50
|
spatial_net_list.append(spatial_net_temp)
|
52
|
-
logger.info(f
|
51
|
+
logger.info(f"{ct}: {coor_temp.shape[0]} cells")
|
53
52
|
|
54
53
|
# Cells labeled as nan
|
55
54
|
if pd.isnull(adata.obs[annotation]).any():
|
56
55
|
idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
|
57
|
-
logger.info(f
|
56
|
+
logger.info(f"Nan: {len(idx_nan)} cells")
|
58
57
|
spatial_net_temp = find_neighbors(coor, num_neighbour)
|
59
|
-
spatial_net_temp = spatial_net_temp[spatial_net_temp[
|
58
|
+
spatial_net_temp = spatial_net_temp[spatial_net_temp["Cell1"].isin(idx_nan)]
|
60
59
|
spatial_net_list.append(spatial_net_temp)
|
61
60
|
spatial_net = pd.concat(spatial_net_list, axis=0)
|
62
61
|
else:
|
63
|
-
logger.info(
|
62
|
+
logger.info("Cell annotations are not provided...")
|
64
63
|
spatial_net = find_neighbors(coor, num_neighbour)
|
65
64
|
|
66
|
-
return spatial_net.groupby(
|
65
|
+
return spatial_net.groupby("Cell1")["Cell2"].apply(np.array).to_dict()
|
67
66
|
|
68
67
|
|
69
68
|
def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
|
@@ -96,8 +95,16 @@ def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cel
|
|
96
95
|
return cell_select_pos
|
97
96
|
|
98
97
|
|
99
|
-
def compute_regional_mkscore(
|
100
|
-
|
98
|
+
def compute_regional_mkscore(
|
99
|
+
cell_pos,
|
100
|
+
spatial_net_dict,
|
101
|
+
coor_latent,
|
102
|
+
config,
|
103
|
+
cell_annotations,
|
104
|
+
ranks,
|
105
|
+
frac_whole,
|
106
|
+
adata_X_bool,
|
107
|
+
):
|
101
108
|
"""
|
102
109
|
Compute gmean ranks of a region.
|
103
110
|
"""
|
@@ -122,79 +129,113 @@ def compute_regional_mkscore(cell_pos, spatial_net_dict, coor_latent, config, ce
|
|
122
129
|
# Simultaneously consider the ratio of expression fractions and ranks
|
123
130
|
gene_ranks_region = gene_ranks_region * frac_region
|
124
131
|
|
125
|
-
mkscore = np.exp(gene_ranks_region
|
132
|
+
mkscore = np.exp(gene_ranks_region**1.5) - 1
|
126
133
|
return mkscore.astype(np.float16, copy=False)
|
127
134
|
|
128
135
|
|
129
136
|
def run_latent_to_gene(config: LatentToGeneConfig):
|
130
|
-
logger.info(
|
137
|
+
logger.info("------Loading the spatial data...")
|
131
138
|
adata = sc.read_h5ad(config.hdf5_with_latent_path)
|
132
|
-
logger.info(f
|
139
|
+
logger.info(f"Loaded spatial data with {adata.n_obs} cells and {adata.n_vars} genes.")
|
133
140
|
|
134
141
|
if config.annotation is not None:
|
135
|
-
logger.info(f
|
142
|
+
logger.info(f"------Cell annotations are provided as {config.annotation}...")
|
136
143
|
initial_cell_count = adata.n_obs
|
137
144
|
adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
|
138
|
-
logger.info(
|
145
|
+
logger.info(
|
146
|
+
f"Removed null annotations. Cells retained: {adata.n_obs} (initial: {initial_cell_count})."
|
147
|
+
)
|
139
148
|
|
140
149
|
# Homologs transformation
|
141
|
-
if config.homolog_file is not None:
|
142
|
-
|
143
|
-
|
144
|
-
if
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
150
|
+
if config.homolog_file is not None and config.species is not None:
|
151
|
+
species_col_name = f"{config.species}_homolog"
|
152
|
+
|
153
|
+
# Check if homolog conversion has already been performed
|
154
|
+
if species_col_name in adata.var.columns:
|
155
|
+
logger.warning(
|
156
|
+
f"Column '{species_col_name}' already exists in adata.var. "
|
157
|
+
f"It appears gene names have already been converted to human gene symbols. "
|
158
|
+
f"Skipping homolog transformation."
|
159
|
+
)
|
160
|
+
else:
|
161
|
+
logger.info(f"------Transforming the {config.species} to HUMAN_GENE_SYM...")
|
162
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
163
|
+
if homologs.shape[1] != 2:
|
164
|
+
raise ValueError(
|
165
|
+
"Homologs file must have two columns: one for the species and one for the human gene symbol."
|
166
|
+
)
|
167
|
+
|
168
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
169
|
+
homologs.set_index(config.species, inplace=True)
|
155
170
|
|
156
|
-
|
157
|
-
|
158
|
-
|
171
|
+
# original_gene_names = adata.var_names.copy()
|
172
|
+
|
173
|
+
# Filter genes present in homolog file
|
174
|
+
adata = adata[:, adata.var_names.isin(homologs.index)]
|
175
|
+
logger.info(f"{adata.shape[1]} genes retained after homolog transformation.")
|
176
|
+
if adata.shape[1] < 100:
|
177
|
+
raise ValueError("Too few genes retained in ST data (<100).")
|
178
|
+
|
179
|
+
# Create mapping table of original to human gene names
|
180
|
+
gene_mapping = pd.Series(
|
181
|
+
homologs.loc[adata.var_names, "HUMAN_GENE_SYM"].values, index=adata.var_names
|
182
|
+
)
|
183
|
+
|
184
|
+
# Store original species gene names in var dataframe with the suffixed column name
|
185
|
+
adata.var[species_col_name] = adata.var_names.values
|
186
|
+
|
187
|
+
# Convert var_names to human gene symbols
|
188
|
+
adata.var_names = gene_mapping.values
|
189
|
+
adata.var.index.name = "HUMAN_GENE_SYM"
|
190
|
+
|
191
|
+
# Remove duplicated genes after conversion
|
192
|
+
adata = adata[:, ~adata.var_names.duplicated()]
|
193
|
+
logger.info(f"{adata.shape[1]} genes retained after removing duplicates.")
|
159
194
|
|
160
195
|
if config.annotation is not None:
|
161
196
|
cell_annotations = adata.obs[config.annotation].values
|
162
|
-
logger.info(f
|
197
|
+
logger.info(f"Using cell annotations for {len(cell_annotations)} cells.")
|
163
198
|
else:
|
164
199
|
cell_annotations = None
|
165
200
|
|
166
201
|
# Build the spatial graph
|
167
|
-
logger.info(
|
202
|
+
logger.info("------Building the spatial graph...")
|
168
203
|
spatial_net_dict = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
|
169
|
-
logger.info(
|
204
|
+
logger.info("Spatial graph built successfully.")
|
170
205
|
|
171
206
|
# Extract the latent representation
|
172
|
-
logger.info(
|
207
|
+
logger.info("------Extracting the latent representation...")
|
173
208
|
coor_latent = adata.obsm[config.latent_representation]
|
174
209
|
coor_latent = coor_latent.astype(np.float32)
|
175
|
-
logger.info(
|
210
|
+
logger.info("Latent representation extracted.")
|
176
211
|
|
177
212
|
# Geometric mean across slices
|
178
213
|
gM = None
|
214
|
+
frac_whole = None
|
179
215
|
if config.gM_slices is not None:
|
180
|
-
logger.info(
|
216
|
+
logger.info("Geometrical mean across multiple slices is provided.")
|
181
217
|
gM_df = pd.read_parquet(config.gM_slices)
|
182
218
|
if config.species is not None:
|
183
|
-
homologs = pd.read_csv(config.homolog_file, sep=
|
219
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
184
220
|
if homologs.shape[1] < 2:
|
185
|
-
raise ValueError(
|
186
|
-
|
221
|
+
raise ValueError(
|
222
|
+
"Homologs file must have at least two columns: one for the species and one for the human gene symbol."
|
223
|
+
)
|
224
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
187
225
|
homologs.set_index(config.species, inplace=True)
|
188
226
|
gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
|
189
|
-
gM_df.index = homologs.loc[gM_df.index,
|
227
|
+
gM_df.index = homologs.loc[gM_df.index, "HUMAN_GENE_SYM"].values
|
190
228
|
common_genes = np.intersect1d(adata.var_names, gM_df.index)
|
191
229
|
gM_df = gM_df.loc[common_genes]
|
192
|
-
gM = gM_df[
|
230
|
+
gM = gM_df["G_Mean"].values
|
231
|
+
frac_whole = gM_df["frac"].values
|
193
232
|
adata = adata[:, common_genes]
|
194
|
-
logger.info(
|
233
|
+
logger.info(
|
234
|
+
f"{len(common_genes)} common genes retained after loading the cross slice geometric mean."
|
235
|
+
)
|
195
236
|
|
196
237
|
# Compute ranks after taking common genes with gM_slices
|
197
|
-
logger.info(
|
238
|
+
logger.info("------Ranking the spatial data...")
|
198
239
|
if not scipy.sparse.issparse(adata.X):
|
199
240
|
adata_X = scipy.sparse.csr_matrix(adata.X)
|
200
241
|
elif isinstance(adata.X, scipy.sparse.csr_matrix):
|
@@ -202,51 +243,70 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
202
243
|
else:
|
203
244
|
adata_X = adata.X.tocsr()
|
204
245
|
|
205
|
-
|
246
|
+
# Create mappings
|
247
|
+
n_cells = adata.n_obs
|
248
|
+
n_genes = adata.n_vars
|
206
249
|
|
250
|
+
ranks = np.zeros((n_cells, adata.n_vars), dtype=np.float16)
|
207
251
|
for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
|
208
252
|
data = adata_X[i, :].toarray().flatten()
|
209
|
-
ranks[i, :] = rankdata(data, method=
|
253
|
+
ranks[i, :] = rankdata(data, method="average")
|
210
254
|
|
211
255
|
if gM is None:
|
212
256
|
gM = gmean(ranks, axis=0)
|
257
|
+
gM = gM.astype(np.float16)
|
213
258
|
|
214
|
-
# Compute the fraction of each gene across cells
|
215
259
|
adata_X_bool = adata_X.astype(bool)
|
216
|
-
frac_whole
|
217
|
-
|
260
|
+
if frac_whole is None:
|
261
|
+
# Compute the fraction of each gene across cells
|
262
|
+
frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
|
263
|
+
logger.info("Gene expression proportion of each gene across cells computed.")
|
264
|
+
else:
|
265
|
+
logger.info(
|
266
|
+
"Gene expression proportion of each gene across cells in all sections has been provided."
|
267
|
+
)
|
218
268
|
|
269
|
+
frac_whole += 1e-12 # Avoid division by zero
|
219
270
|
# Normalize the ranks
|
220
271
|
ranks /= gM
|
221
272
|
|
222
|
-
# Compute marker scores in parallel
|
223
|
-
logger.info('------Computing marker scores...')
|
224
273
|
def compute_mk_score_wrapper(cell_pos):
|
225
274
|
return compute_regional_mkscore(
|
226
|
-
cell_pos,
|
275
|
+
cell_pos,
|
276
|
+
spatial_net_dict,
|
277
|
+
coor_latent,
|
278
|
+
config,
|
279
|
+
cell_annotations,
|
280
|
+
ranks,
|
281
|
+
frac_whole,
|
282
|
+
adata_X_bool,
|
227
283
|
)
|
228
284
|
|
229
|
-
|
230
|
-
mk_score = np.
|
231
|
-
|
285
|
+
logger.info("------Computing marker scores...")
|
286
|
+
mk_score = np.zeros((n_cells, n_genes), dtype=np.float16)
|
287
|
+
for cell_pos in trange(n_cells, desc="Calculating marker scores"):
|
288
|
+
mk_score[cell_pos, :] = compute_mk_score_wrapper(cell_pos)
|
289
|
+
|
290
|
+
mk_score = mk_score.T
|
291
|
+
logger.info("Marker scores computed.")
|
232
292
|
|
233
293
|
# Remove mitochondrial genes
|
234
294
|
gene_names = adata.var_names.values.astype(str)
|
235
|
-
mt_gene_mask = ~(np.char.startswith(gene_names,
|
295
|
+
mt_gene_mask = ~(np.char.startswith(gene_names, "MT-") | np.char.startswith(gene_names, "mt-"))
|
236
296
|
mk_score = mk_score[mt_gene_mask, :]
|
237
297
|
gene_names = gene_names[mt_gene_mask]
|
238
|
-
logger.info(f
|
298
|
+
logger.info(f"Removed mitochondrial genes. Remaining genes: {len(gene_names)}.")
|
239
299
|
|
240
300
|
# Save the marker scores
|
241
|
-
logger.info(
|
301
|
+
logger.info("------Saving marker scores ...")
|
242
302
|
output_file_path = Path(config.mkscore_feather_path)
|
243
303
|
output_file_path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
244
304
|
mk_score_df = pd.DataFrame(mk_score, index=gene_names, columns=adata.obs_names)
|
245
305
|
mk_score_df.reset_index(inplace=True)
|
246
|
-
mk_score_df.rename(columns={
|
306
|
+
mk_score_df.rename(columns={"index": "HUMAN_GENE_SYM"}, inplace=True)
|
247
307
|
mk_score_df.to_feather(output_file_path)
|
248
|
-
logger.info(f
|
308
|
+
logger.info(f"Marker scores saved to {output_file_path}.")
|
249
309
|
|
250
310
|
# Save the modified adata object to disk
|
251
311
|
adata.write(config.hdf5_with_latent_path)
|
252
|
-
logger.info(f
|
312
|
+
logger.info(f"Modified adata object saved to {config.hdf5_with_latent_path}.")
|
gsMap/main.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
import argparse
|
2
|
+
|
3
|
+
from gsMap import __version__
|
4
|
+
from gsMap.config import cli_function_registry
|
5
|
+
|
3
6
|
|
4
7
|
def main():
|
5
8
|
parser = create_parser()
|
@@ -7,21 +10,27 @@ def main():
|
|
7
10
|
if args.subcommand is None:
|
8
11
|
parser.print_help()
|
9
12
|
exit(1)
|
10
|
-
args.func(
|
11
|
-
|
12
|
-
)
|
13
|
+
args.func(args)
|
14
|
+
|
13
15
|
|
14
16
|
def create_parser():
|
15
|
-
parser = argparse.ArgumentParser(
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
parser = argparse.ArgumentParser(
|
18
|
+
description=" gsMap: genetically informed spatial mapping of cells for complex traits",
|
19
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
20
|
+
prog="gsMap",
|
21
|
+
)
|
22
|
+
parser.add_argument(
|
23
|
+
"--version", "-v", action="version", version=f"gsMap version {__version__}"
|
24
|
+
)
|
25
|
+
subparsers = parser.add_subparsers(
|
26
|
+
dest="subcommand", help="Subcommands", title="Available subcommands"
|
27
|
+
)
|
21
28
|
for subcommand in cli_function_registry.values():
|
22
|
-
subcommand_parser = subparsers.add_parser(
|
23
|
-
|
24
|
-
|
29
|
+
subcommand_parser = subparsers.add_parser(
|
30
|
+
subcommand.name,
|
31
|
+
help=subcommand.description,
|
32
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
33
|
+
)
|
25
34
|
subcommand.add_args_function(subcommand_parser)
|
26
35
|
subcommand_parser.set_defaults(func=subcommand.func)
|
27
36
|
return parser
|
gsMap/report.py
CHANGED
@@ -16,16 +16,17 @@ logger = logging.getLogger(__name__)
|
|
16
16
|
try:
|
17
17
|
from importlib.resources import files
|
18
18
|
|
19
|
-
template_dir = files(
|
19
|
+
template_dir = files("gsMap").joinpath("templates")
|
20
20
|
except (ImportError, FileNotFoundError):
|
21
21
|
# Fallback to a relative path if running in development mode
|
22
|
-
template_dir = os.path.join(os.path.dirname(__file__),
|
22
|
+
template_dir = os.path.join(os.path.dirname(__file__), "templates")
|
23
23
|
|
24
24
|
# Set up Jinja2 environment
|
25
25
|
env = Environment(loader=FileSystemLoader(template_dir))
|
26
26
|
|
27
27
|
# Load the template
|
28
|
-
template = env.get_template(
|
28
|
+
template = env.get_template("report_template.html")
|
29
|
+
|
29
30
|
|
30
31
|
def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
31
32
|
"""Copy specified files (HTML or PNG) to the report directory."""
|
@@ -36,28 +37,30 @@ def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
|
36
37
|
|
37
38
|
def load_cauchy_table(csv_file):
|
38
39
|
"""Load the Cauchy combination table from a compressed CSV file using Pandas."""
|
39
|
-
df = pd.read_csv(csv_file, compression=
|
40
|
-
table_data = df[[
|
40
|
+
df = pd.read_csv(csv_file, compression="gzip")
|
41
|
+
table_data = df[["annotation", "p_cauchy", "p_median"]].to_dict(orient="records")
|
41
42
|
return table_data
|
42
43
|
|
43
44
|
|
44
45
|
def load_gene_diagnostic_info(csv_file):
|
45
46
|
"""Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
|
46
47
|
df = pd.read_csv(csv_file)
|
47
|
-
top_50 = df.head(50).to_dict(orient=
|
48
|
+
top_50 = df.head(50).to_dict(orient="records")
|
48
49
|
return top_50
|
49
50
|
|
50
51
|
|
51
52
|
def embed_html_content(file_path):
|
52
53
|
"""Read the content of an HTML file and return it as a string."""
|
53
|
-
with open(file_path
|
54
|
+
with open(file_path) as f:
|
54
55
|
return f.read()
|
55
56
|
|
57
|
+
|
56
58
|
def check_and_run_cauchy_combination(config):
|
57
59
|
cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
|
58
60
|
if cauchy_result_file.exists():
|
59
61
|
logger.info(
|
60
|
-
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
62
|
+
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
63
|
+
)
|
61
64
|
else:
|
62
65
|
logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
|
63
66
|
cauchy_config = CauchyCombinationConfig(
|
@@ -68,16 +71,16 @@ def check_and_run_cauchy_combination(config):
|
|
68
71
|
)
|
69
72
|
run_Cauchy_combination(cauchy_config)
|
70
73
|
|
71
|
-
df = pd.read_csv(cauchy_result_file, compression=
|
72
|
-
table_data = df[[
|
74
|
+
df = pd.read_csv(cauchy_result_file, compression="gzip")
|
75
|
+
table_data = df[["annotation", "p_cauchy", "p_median"]].to_dict(orient="records")
|
73
76
|
|
74
77
|
return table_data
|
75
78
|
|
76
|
-
def run_report(config: ReportConfig, run_parameters=None):
|
77
79
|
|
78
|
-
|
80
|
+
def run_report(config: ReportConfig, run_parameters=None):
|
81
|
+
logger.info("Running gsMap Diagnosis Module")
|
79
82
|
run_Diagnosis(config)
|
80
|
-
logger.info(
|
83
|
+
logger.info("gsMap Diagnosis running successfully")
|
81
84
|
|
82
85
|
report_dir = config.get_report_dir(config.trait_name)
|
83
86
|
gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
|
@@ -90,19 +93,27 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
90
93
|
gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
|
91
94
|
|
92
95
|
gene_plots = []
|
93
|
-
plot_select_gene_list =
|
96
|
+
plot_select_gene_list = (
|
97
|
+
config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
|
98
|
+
)
|
94
99
|
for gene_name in plot_select_gene_list:
|
95
|
-
expression_png =
|
100
|
+
expression_png = (
|
101
|
+
gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
|
102
|
+
)
|
96
103
|
gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
|
97
104
|
# check if expression and GSS plots exist
|
98
105
|
if not os.path.exists(expression_png) or not os.path.exists(gss_png):
|
99
106
|
print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
|
100
107
|
continue
|
101
|
-
gene_plots.append(
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
108
|
+
gene_plots.append(
|
109
|
+
{
|
110
|
+
"name": gene_name,
|
111
|
+
"expression_plot": expression_png.relative_to(
|
112
|
+
report_dir
|
113
|
+
), # Path for gene expression plot
|
114
|
+
"gss_plot": gss_png.relative_to(report_dir), # Path for GSS distribution plot
|
115
|
+
}
|
116
|
+
)
|
106
117
|
|
107
118
|
# # Copy PNG files to the report directory
|
108
119
|
# copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
|
@@ -115,7 +126,9 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
115
126
|
# Sample data for other report components
|
116
127
|
title = f"{config.sample_name} Genetic Spatial Mapping Report"
|
117
128
|
|
118
|
-
genetic_mapping_plot = embed_html_content(
|
129
|
+
genetic_mapping_plot = embed_html_content(
|
130
|
+
config.get_gsMap_html_plot_save_path(config.trait_name)
|
131
|
+
)
|
119
132
|
manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
|
120
133
|
|
121
134
|
gsmap_version = gsMap.__version__
|
@@ -133,13 +146,12 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
133
146
|
"Report Directory": config.get_report_dir(trait_name),
|
134
147
|
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
135
148
|
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
136
|
-
"Report Generation Date": pd.Timestamp.now().strftime(
|
149
|
+
"Report Generation Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
137
150
|
}
|
138
151
|
|
139
152
|
if run_parameters is not None:
|
140
153
|
default_run_parameters.update(run_parameters)
|
141
154
|
|
142
|
-
|
143
155
|
output_html = template.render(
|
144
156
|
title=title,
|
145
157
|
genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
|
@@ -148,7 +160,7 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
148
160
|
gene_plots=gene_plots, # List of PNG paths for gene plots
|
149
161
|
gsmap_version=gsmap_version,
|
150
162
|
parameters=default_run_parameters, # Pass the run parameters to the template
|
151
|
-
gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
|
163
|
+
gene_diagnostic_info=gene_diagnostic_info, # Include top 50 gene diagnostic info rows
|
152
164
|
)
|
153
165
|
|
154
166
|
# Save the generated HTML report in the 'report' directory
|
@@ -157,4 +169,6 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
157
169
|
f.write(output_html)
|
158
170
|
|
159
171
|
logger.info(f"Report generated successfully! Saved at {report_file}.")
|
160
|
-
logger.info(
|
172
|
+
logger.info(
|
173
|
+
"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report."
|
174
|
+
)
|