gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +75 -72
- gsMap/config.py +822 -316
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +28 -26
- gsMap/format_sumstats.py +233 -201
- gsMap/generate_ldscore.py +353 -209
- gsMap/latent_to_gene.py +92 -60
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +86 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +173 -140
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +105 -122
- gsMap/visualize.py +82 -64
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/latent_to_gene.py
CHANGED
@@ -5,11 +5,10 @@ import numpy as np
|
|
5
5
|
import pandas as pd
|
6
6
|
import scanpy as sc
|
7
7
|
import scipy
|
8
|
-
from scipy.stats import gmean
|
9
|
-
from scipy.stats import rankdata
|
8
|
+
from scipy.stats import gmean, rankdata
|
10
9
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
10
|
from sklearn.neighbors import NearestNeighbors
|
12
|
-
from tqdm import tqdm
|
11
|
+
from tqdm import tqdm, trange
|
13
12
|
|
14
13
|
from gsMap.config import LatentToGeneConfig
|
15
14
|
|
@@ -26,7 +25,7 @@ def find_neighbors(coor, num_neighbour):
|
|
26
25
|
cell1 = np.repeat(cell_indices, indices.shape[1])
|
27
26
|
cell2 = indices.flatten()
|
28
27
|
distance = distances.flatten()
|
29
|
-
spatial_net = pd.DataFrame({
|
28
|
+
spatial_net = pd.DataFrame({"Cell1": cell1, "Cell2": cell2, "Distance": distance})
|
30
29
|
return spatial_net
|
31
30
|
|
32
31
|
|
@@ -34,11 +33,11 @@ def build_spatial_net(adata, annotation, num_neighbour):
|
|
34
33
|
"""
|
35
34
|
Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
|
36
35
|
"""
|
37
|
-
logger.info(
|
36
|
+
logger.info("------Building spatial graph based on spatial coordinates...")
|
38
37
|
|
39
|
-
coor = adata.obsm[
|
38
|
+
coor = adata.obsm["spatial"]
|
40
39
|
if annotation is not None:
|
41
|
-
logger.info(
|
40
|
+
logger.info("Cell annotations are provided...")
|
42
41
|
spatial_net_list = []
|
43
42
|
# Cells with annotations
|
44
43
|
for ct in adata.obs[annotation].dropna().unique():
|
@@ -46,24 +45,24 @@ def build_spatial_net(adata, annotation, num_neighbour):
|
|
46
45
|
coor_temp = coor[idx, :]
|
47
46
|
spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
|
48
47
|
# Map back to original indices
|
49
|
-
spatial_net_temp[
|
50
|
-
spatial_net_temp[
|
48
|
+
spatial_net_temp["Cell1"] = idx[spatial_net_temp["Cell1"].values]
|
49
|
+
spatial_net_temp["Cell2"] = idx[spatial_net_temp["Cell2"].values]
|
51
50
|
spatial_net_list.append(spatial_net_temp)
|
52
|
-
logger.info(f
|
51
|
+
logger.info(f"{ct}: {coor_temp.shape[0]} cells")
|
53
52
|
|
54
53
|
# Cells labeled as nan
|
55
54
|
if pd.isnull(adata.obs[annotation]).any():
|
56
55
|
idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
|
57
|
-
logger.info(f
|
56
|
+
logger.info(f"Nan: {len(idx_nan)} cells")
|
58
57
|
spatial_net_temp = find_neighbors(coor, num_neighbour)
|
59
|
-
spatial_net_temp = spatial_net_temp[spatial_net_temp[
|
58
|
+
spatial_net_temp = spatial_net_temp[spatial_net_temp["Cell1"].isin(idx_nan)]
|
60
59
|
spatial_net_list.append(spatial_net_temp)
|
61
60
|
spatial_net = pd.concat(spatial_net_list, axis=0)
|
62
61
|
else:
|
63
|
-
logger.info(
|
62
|
+
logger.info("Cell annotations are not provided...")
|
64
63
|
spatial_net = find_neighbors(coor, num_neighbour)
|
65
64
|
|
66
|
-
return spatial_net.groupby(
|
65
|
+
return spatial_net.groupby("Cell1")["Cell2"].apply(np.array).to_dict()
|
67
66
|
|
68
67
|
|
69
68
|
def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
|
@@ -96,8 +95,16 @@ def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cel
|
|
96
95
|
return cell_select_pos
|
97
96
|
|
98
97
|
|
99
|
-
def compute_regional_mkscore(
|
100
|
-
|
98
|
+
def compute_regional_mkscore(
|
99
|
+
cell_pos,
|
100
|
+
spatial_net_dict,
|
101
|
+
coor_latent,
|
102
|
+
config,
|
103
|
+
cell_annotations,
|
104
|
+
ranks,
|
105
|
+
frac_whole,
|
106
|
+
adata_X_bool,
|
107
|
+
):
|
101
108
|
"""
|
102
109
|
Compute gmean ranks of a region.
|
103
110
|
"""
|
@@ -122,79 +129,85 @@ def compute_regional_mkscore(cell_pos, spatial_net_dict, coor_latent, config, ce
|
|
122
129
|
# Simultaneously consider the ratio of expression fractions and ranks
|
123
130
|
gene_ranks_region = gene_ranks_region * frac_region
|
124
131
|
|
125
|
-
mkscore = np.exp(gene_ranks_region
|
132
|
+
mkscore = np.exp(gene_ranks_region**1.5) - 1
|
126
133
|
return mkscore.astype(np.float16, copy=False)
|
127
134
|
|
128
135
|
|
129
136
|
def run_latent_to_gene(config: LatentToGeneConfig):
|
130
|
-
logger.info(
|
137
|
+
logger.info("------Loading the spatial data...")
|
131
138
|
adata = sc.read_h5ad(config.hdf5_with_latent_path)
|
132
|
-
logger.info(f
|
139
|
+
logger.info(f"Loaded spatial data with {adata.n_obs} cells and {adata.n_vars} genes.")
|
133
140
|
|
134
141
|
if config.annotation is not None:
|
135
|
-
logger.info(f
|
142
|
+
logger.info(f"------Cell annotations are provided as {config.annotation}...")
|
136
143
|
initial_cell_count = adata.n_obs
|
137
144
|
adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
|
138
|
-
logger.info(
|
145
|
+
logger.info(
|
146
|
+
f"Removed null annotations. Cells retained: {adata.n_obs} (initial: {initial_cell_count})."
|
147
|
+
)
|
139
148
|
|
140
149
|
# Homologs transformation
|
141
150
|
if config.homolog_file is not None:
|
142
|
-
logger.info(f
|
143
|
-
homologs = pd.read_csv(config.homolog_file, sep=
|
151
|
+
logger.info(f"------Transforming the {config.species} to HUMAN_GENE_SYM...")
|
152
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
144
153
|
if homologs.shape[1] != 2:
|
145
|
-
raise ValueError(
|
154
|
+
raise ValueError(
|
155
|
+
"Homologs file must have two columns: one for the species and one for the human gene symbol."
|
156
|
+
)
|
146
157
|
|
147
|
-
homologs.columns = [config.species,
|
158
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
148
159
|
homologs.set_index(config.species, inplace=True)
|
149
160
|
adata = adata[:, adata.var_names.isin(homologs.index)]
|
150
161
|
logger.info(f"{adata.shape[1]} genes retained after homolog transformation.")
|
151
162
|
if adata.shape[1] < 100:
|
152
163
|
raise ValueError("Too few genes retained in ST data (<100).")
|
153
|
-
adata.var_names = homologs.loc[adata.var_names,
|
164
|
+
adata.var_names = homologs.loc[adata.var_names, "HUMAN_GENE_SYM"].values
|
154
165
|
adata = adata[:, ~adata.var_names.duplicated()]
|
155
166
|
|
156
|
-
# Create mappings
|
157
|
-
n_cells = adata.n_obs
|
158
|
-
n_genes = adata.n_vars
|
159
|
-
|
160
167
|
if config.annotation is not None:
|
161
168
|
cell_annotations = adata.obs[config.annotation].values
|
162
|
-
logger.info(f
|
169
|
+
logger.info(f"Using cell annotations for {len(cell_annotations)} cells.")
|
163
170
|
else:
|
164
171
|
cell_annotations = None
|
165
172
|
|
166
173
|
# Build the spatial graph
|
167
|
-
logger.info(
|
174
|
+
logger.info("------Building the spatial graph...")
|
168
175
|
spatial_net_dict = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
|
169
|
-
logger.info(
|
176
|
+
logger.info("Spatial graph built successfully.")
|
170
177
|
|
171
178
|
# Extract the latent representation
|
172
|
-
logger.info(
|
179
|
+
logger.info("------Extracting the latent representation...")
|
173
180
|
coor_latent = adata.obsm[config.latent_representation]
|
174
181
|
coor_latent = coor_latent.astype(np.float32)
|
175
|
-
logger.info(
|
182
|
+
logger.info("Latent representation extracted.")
|
176
183
|
|
177
184
|
# Geometric mean across slices
|
178
185
|
gM = None
|
186
|
+
frac_whole = None
|
179
187
|
if config.gM_slices is not None:
|
180
|
-
logger.info(
|
188
|
+
logger.info("Geometrical mean across multiple slices is provided.")
|
181
189
|
gM_df = pd.read_parquet(config.gM_slices)
|
182
190
|
if config.species is not None:
|
183
|
-
homologs = pd.read_csv(config.homolog_file, sep=
|
191
|
+
homologs = pd.read_csv(config.homolog_file, sep="\t")
|
184
192
|
if homologs.shape[1] < 2:
|
185
|
-
raise ValueError(
|
186
|
-
|
193
|
+
raise ValueError(
|
194
|
+
"Homologs file must have at least two columns: one for the species and one for the human gene symbol."
|
195
|
+
)
|
196
|
+
homologs.columns = [config.species, "HUMAN_GENE_SYM"]
|
187
197
|
homologs.set_index(config.species, inplace=True)
|
188
198
|
gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
|
189
|
-
gM_df.index = homologs.loc[gM_df.index,
|
199
|
+
gM_df.index = homologs.loc[gM_df.index, "HUMAN_GENE_SYM"].values
|
190
200
|
common_genes = np.intersect1d(adata.var_names, gM_df.index)
|
191
201
|
gM_df = gM_df.loc[common_genes]
|
192
|
-
gM = gM_df[
|
202
|
+
gM = gM_df["G_Mean"].values
|
203
|
+
frac_whole = gM_df["frac"].values
|
193
204
|
adata = adata[:, common_genes]
|
194
|
-
logger.info(
|
205
|
+
logger.info(
|
206
|
+
f"{len(common_genes)} common genes retained after loading the cross slice geometric mean."
|
207
|
+
)
|
195
208
|
|
196
209
|
# Compute ranks after taking common genes with gM_slices
|
197
|
-
logger.info(
|
210
|
+
logger.info("------Ranking the spatial data...")
|
198
211
|
if not scipy.sparse.issparse(adata.X):
|
199
212
|
adata_X = scipy.sparse.csr_matrix(adata.X)
|
200
213
|
elif isinstance(adata.X, scipy.sparse.csr_matrix):
|
@@ -202,51 +215,70 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
202
215
|
else:
|
203
216
|
adata_X = adata.X.tocsr()
|
204
217
|
|
205
|
-
|
218
|
+
# Create mappings
|
219
|
+
n_cells = adata.n_obs
|
220
|
+
n_genes = adata.n_vars
|
206
221
|
|
222
|
+
ranks = np.zeros((n_cells, adata.n_vars), dtype=np.float16)
|
207
223
|
for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
|
208
224
|
data = adata_X[i, :].toarray().flatten()
|
209
|
-
ranks[i, :] = rankdata(data, method=
|
225
|
+
ranks[i, :] = rankdata(data, method="average")
|
210
226
|
|
211
227
|
if gM is None:
|
212
228
|
gM = gmean(ranks, axis=0)
|
229
|
+
gM = gM.astype(np.float16)
|
213
230
|
|
214
|
-
# Compute the fraction of each gene across cells
|
215
231
|
adata_X_bool = adata_X.astype(bool)
|
216
|
-
frac_whole
|
217
|
-
|
232
|
+
if frac_whole is None:
|
233
|
+
# Compute the fraction of each gene across cells
|
234
|
+
frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
|
235
|
+
logger.info("Gene expression proportion of each gene across cells computed.")
|
236
|
+
else:
|
237
|
+
logger.info(
|
238
|
+
"Gene expression proportion of each gene across cells in all sections has been provided."
|
239
|
+
)
|
218
240
|
|
241
|
+
frac_whole += 1e-12 # Avoid division by zero
|
219
242
|
# Normalize the ranks
|
220
243
|
ranks /= gM
|
221
244
|
|
222
|
-
# Compute marker scores in parallel
|
223
|
-
logger.info('------Computing marker scores...')
|
224
245
|
def compute_mk_score_wrapper(cell_pos):
|
225
246
|
return compute_regional_mkscore(
|
226
|
-
cell_pos,
|
247
|
+
cell_pos,
|
248
|
+
spatial_net_dict,
|
249
|
+
coor_latent,
|
250
|
+
config,
|
251
|
+
cell_annotations,
|
252
|
+
ranks,
|
253
|
+
frac_whole,
|
254
|
+
adata_X_bool,
|
227
255
|
)
|
228
256
|
|
229
|
-
|
230
|
-
mk_score = np.
|
231
|
-
|
257
|
+
logger.info("------Computing marker scores...")
|
258
|
+
mk_score = np.zeros((n_cells, n_genes), dtype=np.float16)
|
259
|
+
for cell_pos in trange(n_cells, desc="Calculating marker scores"):
|
260
|
+
mk_score[cell_pos, :] = compute_mk_score_wrapper(cell_pos)
|
261
|
+
|
262
|
+
mk_score = mk_score.T
|
263
|
+
logger.info("Marker scores computed.")
|
232
264
|
|
233
265
|
# Remove mitochondrial genes
|
234
266
|
gene_names = adata.var_names.values.astype(str)
|
235
|
-
mt_gene_mask = ~(np.char.startswith(gene_names,
|
267
|
+
mt_gene_mask = ~(np.char.startswith(gene_names, "MT-") | np.char.startswith(gene_names, "mt-"))
|
236
268
|
mk_score = mk_score[mt_gene_mask, :]
|
237
269
|
gene_names = gene_names[mt_gene_mask]
|
238
|
-
logger.info(f
|
270
|
+
logger.info(f"Removed mitochondrial genes. Remaining genes: {len(gene_names)}.")
|
239
271
|
|
240
272
|
# Save the marker scores
|
241
|
-
logger.info(
|
273
|
+
logger.info("------Saving marker scores ...")
|
242
274
|
output_file_path = Path(config.mkscore_feather_path)
|
243
275
|
output_file_path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
244
276
|
mk_score_df = pd.DataFrame(mk_score, index=gene_names, columns=adata.obs_names)
|
245
277
|
mk_score_df.reset_index(inplace=True)
|
246
|
-
mk_score_df.rename(columns={
|
278
|
+
mk_score_df.rename(columns={"index": "HUMAN_GENE_SYM"}, inplace=True)
|
247
279
|
mk_score_df.to_feather(output_file_path)
|
248
|
-
logger.info(f
|
280
|
+
logger.info(f"Marker scores saved to {output_file_path}.")
|
249
281
|
|
250
282
|
# Save the modified adata object to disk
|
251
283
|
adata.write(config.hdf5_with_latent_path)
|
252
|
-
logger.info(f
|
284
|
+
logger.info(f"Modified adata object saved to {config.hdf5_with_latent_path}.")
|
gsMap/main.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
import argparse
|
2
|
+
|
3
|
+
from gsMap import __version__
|
4
|
+
from gsMap.config import cli_function_registry
|
5
|
+
|
3
6
|
|
4
7
|
def main():
|
5
8
|
parser = create_parser()
|
@@ -7,21 +10,27 @@ def main():
|
|
7
10
|
if args.subcommand is None:
|
8
11
|
parser.print_help()
|
9
12
|
exit(1)
|
10
|
-
args.func(
|
11
|
-
|
12
|
-
)
|
13
|
+
args.func(args)
|
14
|
+
|
13
15
|
|
14
16
|
def create_parser():
|
15
|
-
parser = argparse.ArgumentParser(
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
17
|
+
parser = argparse.ArgumentParser(
|
18
|
+
description=" gsMap: genetically informed spatial mapping of cells for complex traits",
|
19
|
+
formatter_class=argparse.RawTextHelpFormatter,
|
20
|
+
prog="gsMap",
|
21
|
+
)
|
22
|
+
parser.add_argument(
|
23
|
+
"--version", "-v", action="version", version=f"gsMap version {__version__}"
|
24
|
+
)
|
25
|
+
subparsers = parser.add_subparsers(
|
26
|
+
dest="subcommand", help="Subcommands", title="Available subcommands"
|
27
|
+
)
|
21
28
|
for subcommand in cli_function_registry.values():
|
22
|
-
subcommand_parser = subparsers.add_parser(
|
23
|
-
|
24
|
-
|
29
|
+
subcommand_parser = subparsers.add_parser(
|
30
|
+
subcommand.name,
|
31
|
+
help=subcommand.description,
|
32
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
33
|
+
)
|
25
34
|
subcommand.add_args_function(subcommand_parser)
|
26
35
|
subcommand_parser.set_defaults(func=subcommand.func)
|
27
36
|
return parser
|
gsMap/report.py
CHANGED
@@ -16,16 +16,17 @@ logger = logging.getLogger(__name__)
|
|
16
16
|
try:
|
17
17
|
from importlib.resources import files
|
18
18
|
|
19
|
-
template_dir = files(
|
19
|
+
template_dir = files("gsMap").joinpath("templates")
|
20
20
|
except (ImportError, FileNotFoundError):
|
21
21
|
# Fallback to a relative path if running in development mode
|
22
|
-
template_dir = os.path.join(os.path.dirname(__file__),
|
22
|
+
template_dir = os.path.join(os.path.dirname(__file__), "templates")
|
23
23
|
|
24
24
|
# Set up Jinja2 environment
|
25
25
|
env = Environment(loader=FileSystemLoader(template_dir))
|
26
26
|
|
27
27
|
# Load the template
|
28
|
-
template = env.get_template(
|
28
|
+
template = env.get_template("report_template.html")
|
29
|
+
|
29
30
|
|
30
31
|
def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
31
32
|
"""Copy specified files (HTML or PNG) to the report directory."""
|
@@ -36,28 +37,30 @@ def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
|
36
37
|
|
37
38
|
def load_cauchy_table(csv_file):
|
38
39
|
"""Load the Cauchy combination table from a compressed CSV file using Pandas."""
|
39
|
-
df = pd.read_csv(csv_file, compression=
|
40
|
-
table_data = df[[
|
40
|
+
df = pd.read_csv(csv_file, compression="gzip")
|
41
|
+
table_data = df[["annotation", "p_cauchy", "p_median"]].to_dict(orient="records")
|
41
42
|
return table_data
|
42
43
|
|
43
44
|
|
44
45
|
def load_gene_diagnostic_info(csv_file):
|
45
46
|
"""Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
|
46
47
|
df = pd.read_csv(csv_file)
|
47
|
-
top_50 = df.head(50).to_dict(orient=
|
48
|
+
top_50 = df.head(50).to_dict(orient="records")
|
48
49
|
return top_50
|
49
50
|
|
50
51
|
|
51
52
|
def embed_html_content(file_path):
|
52
53
|
"""Read the content of an HTML file and return it as a string."""
|
53
|
-
with open(file_path
|
54
|
+
with open(file_path) as f:
|
54
55
|
return f.read()
|
55
56
|
|
57
|
+
|
56
58
|
def check_and_run_cauchy_combination(config):
|
57
59
|
cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
|
58
60
|
if cauchy_result_file.exists():
|
59
61
|
logger.info(
|
60
|
-
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
62
|
+
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
63
|
+
)
|
61
64
|
else:
|
62
65
|
logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
|
63
66
|
cauchy_config = CauchyCombinationConfig(
|
@@ -68,16 +71,16 @@ def check_and_run_cauchy_combination(config):
|
|
68
71
|
)
|
69
72
|
run_Cauchy_combination(cauchy_config)
|
70
73
|
|
71
|
-
df = pd.read_csv(cauchy_result_file, compression=
|
72
|
-
table_data = df[[
|
74
|
+
df = pd.read_csv(cauchy_result_file, compression="gzip")
|
75
|
+
table_data = df[["annotation", "p_cauchy", "p_median"]].to_dict(orient="records")
|
73
76
|
|
74
77
|
return table_data
|
75
78
|
|
76
|
-
def run_report(config: ReportConfig, run_parameters=None):
|
77
79
|
|
78
|
-
|
80
|
+
def run_report(config: ReportConfig, run_parameters=None):
|
81
|
+
logger.info("Running gsMap Diagnosis Module")
|
79
82
|
run_Diagnosis(config)
|
80
|
-
logger.info(
|
83
|
+
logger.info("gsMap Diagnosis running successfully")
|
81
84
|
|
82
85
|
report_dir = config.get_report_dir(config.trait_name)
|
83
86
|
gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
|
@@ -90,19 +93,27 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
90
93
|
gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
|
91
94
|
|
92
95
|
gene_plots = []
|
93
|
-
plot_select_gene_list =
|
96
|
+
plot_select_gene_list = (
|
97
|
+
config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
|
98
|
+
)
|
94
99
|
for gene_name in plot_select_gene_list:
|
95
|
-
expression_png =
|
100
|
+
expression_png = (
|
101
|
+
gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
|
102
|
+
)
|
96
103
|
gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
|
97
104
|
# check if expression and GSS plots exist
|
98
105
|
if not os.path.exists(expression_png) or not os.path.exists(gss_png):
|
99
106
|
print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
|
100
107
|
continue
|
101
|
-
gene_plots.append(
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
108
|
+
gene_plots.append(
|
109
|
+
{
|
110
|
+
"name": gene_name,
|
111
|
+
"expression_plot": expression_png.relative_to(
|
112
|
+
report_dir
|
113
|
+
), # Path for gene expression plot
|
114
|
+
"gss_plot": gss_png.relative_to(report_dir), # Path for GSS distribution plot
|
115
|
+
}
|
116
|
+
)
|
106
117
|
|
107
118
|
# # Copy PNG files to the report directory
|
108
119
|
# copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
|
@@ -115,7 +126,9 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
115
126
|
# Sample data for other report components
|
116
127
|
title = f"{config.sample_name} Genetic Spatial Mapping Report"
|
117
128
|
|
118
|
-
genetic_mapping_plot = embed_html_content(
|
129
|
+
genetic_mapping_plot = embed_html_content(
|
130
|
+
config.get_gsMap_html_plot_save_path(config.trait_name)
|
131
|
+
)
|
119
132
|
manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
|
120
133
|
|
121
134
|
gsmap_version = gsMap.__version__
|
@@ -133,13 +146,12 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
133
146
|
"Report Directory": config.get_report_dir(trait_name),
|
134
147
|
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
135
148
|
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
136
|
-
"Report Generation Date": pd.Timestamp.now().strftime(
|
149
|
+
"Report Generation Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
137
150
|
}
|
138
151
|
|
139
152
|
if run_parameters is not None:
|
140
153
|
default_run_parameters.update(run_parameters)
|
141
154
|
|
142
|
-
|
143
155
|
output_html = template.render(
|
144
156
|
title=title,
|
145
157
|
genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
|
@@ -148,7 +160,7 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
148
160
|
gene_plots=gene_plots, # List of PNG paths for gene plots
|
149
161
|
gsmap_version=gsmap_version,
|
150
162
|
parameters=default_run_parameters, # Pass the run parameters to the template
|
151
|
-
gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
|
163
|
+
gene_diagnostic_info=gene_diagnostic_info, # Include top 50 gene diagnostic info rows
|
152
164
|
)
|
153
165
|
|
154
166
|
# Save the generated HTML report in the 'report' directory
|
@@ -157,4 +169,6 @@ def run_report(config: ReportConfig, run_parameters=None):
|
|
157
169
|
f.write(output_html)
|
158
170
|
|
159
171
|
logger.info(f"Report generated successfully! Saved at {report_file}.")
|
160
|
-
logger.info(
|
172
|
+
logger.info(
|
173
|
+
"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report."
|
174
|
+
)
|