gsMap 1.67__py3-none-any.whl → 1.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/{GNN_VAE → GNN}/__init__.py +0 -0
- gsMap/{GNN_VAE → GNN}/adjacency_matrix.py +75 -75
- gsMap/{GNN_VAE → GNN}/model.py +89 -89
- gsMap/{GNN_VAE → GNN}/train.py +88 -86
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +2 -2
- gsMap/cauchy_combination_test.py +141 -141
- gsMap/config.py +805 -803
- gsMap/diagnosis.py +273 -273
- gsMap/find_latent_representation.py +133 -145
- gsMap/format_sumstats.py +407 -407
- gsMap/generate_ldscore.py +618 -618
- gsMap/latent_to_gene.py +234 -234
- gsMap/main.py +31 -31
- gsMap/report.py +160 -160
- gsMap/run_all_mode.py +194 -194
- gsMap/setup.py +0 -0
- gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +735 -735
- gsMap/utils/jackknife.py +514 -514
- gsMap/utils/make_annotations.py +518 -518
- gsMap/utils/manhattan_plot.py +639 -639
- gsMap/utils/regression_read.py +294 -294
- gsMap/visualize.py +198 -198
- {gsmap-1.67.dist-info → gsmap-1.70.dist-info}/LICENSE +21 -21
- {gsmap-1.67.dist-info → gsmap-1.70.dist-info}/METADATA +28 -22
- gsmap-1.70.dist-info/RECORD +31 -0
- gsmap-1.67.dist-info/RECORD +0 -31
- {gsmap-1.67.dist-info → gsmap-1.70.dist-info}/WHEEL +0 -0
- {gsmap-1.67.dist-info → gsmap-1.70.dist-info}/entry_points.txt +0 -0
gsMap/report.py
CHANGED
@@ -1,160 +1,160 @@
|
|
1
|
-
import logging
|
2
|
-
import os
|
3
|
-
import shutil
|
4
|
-
|
5
|
-
import pandas as pd
|
6
|
-
from jinja2 import Environment, FileSystemLoader
|
7
|
-
|
8
|
-
import gsMap
|
9
|
-
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
10
|
-
from gsMap.config import CauchyCombinationConfig, ReportConfig
|
11
|
-
from gsMap.diagnosis import run_Diagnosis
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
# Load the Jinja2 environment
|
16
|
-
try:
|
17
|
-
from importlib.resources import files
|
18
|
-
|
19
|
-
template_dir = files('gsMap').joinpath('templates')
|
20
|
-
except (ImportError, FileNotFoundError):
|
21
|
-
# Fallback to a relative path if running in development mode
|
22
|
-
template_dir = os.path.join(os.path.dirname(__file__), 'templates')
|
23
|
-
|
24
|
-
# Set up Jinja2 environment
|
25
|
-
env = Environment(loader=FileSystemLoader(template_dir))
|
26
|
-
|
27
|
-
# Load the template
|
28
|
-
template = env.get_template('report_template.html')
|
29
|
-
|
30
|
-
def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
31
|
-
"""Copy specified files (HTML or PNG) to the report directory."""
|
32
|
-
os.makedirs(report_dir, exist_ok=True)
|
33
|
-
for file in files_to_copy:
|
34
|
-
shutil.copy2(file, os.path.join(report_dir, os.path.basename(file)))
|
35
|
-
|
36
|
-
|
37
|
-
def load_cauchy_table(csv_file):
|
38
|
-
"""Load the Cauchy combination table from a compressed CSV file using Pandas."""
|
39
|
-
df = pd.read_csv(csv_file, compression='gzip')
|
40
|
-
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
41
|
-
return table_data
|
42
|
-
|
43
|
-
|
44
|
-
def load_gene_diagnostic_info(csv_file):
|
45
|
-
"""Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
|
46
|
-
df = pd.read_csv(csv_file)
|
47
|
-
top_50 = df.head(50).to_dict(orient='records')
|
48
|
-
return top_50
|
49
|
-
|
50
|
-
|
51
|
-
def embed_html_content(file_path):
|
52
|
-
"""Read the content of an HTML file and return it as a string."""
|
53
|
-
with open(file_path, 'r') as f:
|
54
|
-
return f.read()
|
55
|
-
|
56
|
-
def check_and_run_cauchy_combination(config):
|
57
|
-
cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
|
58
|
-
if cauchy_result_file.exists():
|
59
|
-
logger.info(
|
60
|
-
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
61
|
-
else:
|
62
|
-
logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
|
63
|
-
cauchy_config = CauchyCombinationConfig(
|
64
|
-
workdir=config.workdir,
|
65
|
-
sample_name=config.sample_name,
|
66
|
-
annotation=config.annotation,
|
67
|
-
trait_name=config.trait_name,
|
68
|
-
)
|
69
|
-
run_Cauchy_combination(cauchy_config)
|
70
|
-
|
71
|
-
df = pd.read_csv(cauchy_result_file, compression='gzip')
|
72
|
-
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
73
|
-
|
74
|
-
return table_data
|
75
|
-
|
76
|
-
def run_report(config: ReportConfig, run_parameters=None):
|
77
|
-
|
78
|
-
logger.info('Running gsMap Diagnosis Module')
|
79
|
-
run_Diagnosis(config)
|
80
|
-
logger.info('gsMap Diagnosis running successfully')
|
81
|
-
|
82
|
-
report_dir = config.get_report_dir(config.trait_name)
|
83
|
-
gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
|
84
|
-
gene_diagnostic_info = load_gene_diagnostic_info(gene_diagnostic_info_file)
|
85
|
-
|
86
|
-
# Load data (Cauchy table and gene diagnostic info)
|
87
|
-
cauchy_table = check_and_run_cauchy_combination(config)
|
88
|
-
|
89
|
-
# Paths to PNGs for gene expression and GSS distribution
|
90
|
-
gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
|
91
|
-
|
92
|
-
gene_plots = []
|
93
|
-
plot_select_gene_list = config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
|
94
|
-
for gene_name in plot_select_gene_list:
|
95
|
-
expression_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
|
96
|
-
gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
|
97
|
-
# check if expression and GSS plots exist
|
98
|
-
if not os.path.exists(expression_png) or not os.path.exists(gss_png):
|
99
|
-
print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
|
100
|
-
continue
|
101
|
-
gene_plots.append({
|
102
|
-
'name': gene_name,
|
103
|
-
'expression_plot': expression_png.relative_to(report_dir), # Path for gene expression plot
|
104
|
-
'gss_plot': gss_png.relative_to(report_dir) # Path for GSS distribution plot
|
105
|
-
})
|
106
|
-
|
107
|
-
# # Copy PNG files to the report directory
|
108
|
-
# copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
|
109
|
-
|
110
|
-
# Update paths to point to copied images inside the report folder
|
111
|
-
# for gene in gene_plots:
|
112
|
-
# gene['expression_plot'] = os.path.join(os.path.basename(gene['expression_plot']))
|
113
|
-
# gene['gss_plot'] = os.path.join(os.path.basename(gene['gss_plot']))
|
114
|
-
|
115
|
-
# Sample data for other report components
|
116
|
-
title = f"{config.sample_name} Genetic Spatial Mapping Report"
|
117
|
-
|
118
|
-
genetic_mapping_plot = embed_html_content(config.get_gsMap_html_plot_save_path(config.trait_name))
|
119
|
-
manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
|
120
|
-
|
121
|
-
gsmap_version = gsMap.__version__
|
122
|
-
# Render the template with dynamic content, including the run parameters
|
123
|
-
|
124
|
-
trait_name = config.trait_name
|
125
|
-
default_run_parameters = {
|
126
|
-
"Sample Name": config.sample_name,
|
127
|
-
"Trait Name": trait_name,
|
128
|
-
"Summary Statistics File": config.sumstats_file,
|
129
|
-
"HDF5 Path": config.hdf5_with_latent_path,
|
130
|
-
"Annotation": config.annotation,
|
131
|
-
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
132
|
-
"Cauchy Directory": config.cauchy_save_dir,
|
133
|
-
"Report Directory": config.get_report_dir(trait_name),
|
134
|
-
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
135
|
-
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
136
|
-
"Report Generation Date": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
|
137
|
-
}
|
138
|
-
|
139
|
-
if run_parameters is not None:
|
140
|
-
default_run_parameters.update(run_parameters)
|
141
|
-
|
142
|
-
|
143
|
-
output_html = template.render(
|
144
|
-
title=title,
|
145
|
-
genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
|
146
|
-
manhattan_plot=manhattan_plot, # Inlined Manhattan plot
|
147
|
-
cauchy_table=cauchy_table,
|
148
|
-
gene_plots=gene_plots, # List of PNG paths for gene plots
|
149
|
-
gsmap_version=gsmap_version,
|
150
|
-
parameters=default_run_parameters, # Pass the run parameters to the template
|
151
|
-
gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
|
152
|
-
)
|
153
|
-
|
154
|
-
# Save the generated HTML report in the 'report' directory
|
155
|
-
report_file = config.get_gsMap_report_file(config.trait_name)
|
156
|
-
with open(report_file, "w") as f:
|
157
|
-
f.write(output_html)
|
158
|
-
|
159
|
-
logger.info(f"Report generated successfully! Saved at {report_file}.")
|
160
|
-
logger.info(f"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report.")
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from jinja2 import Environment, FileSystemLoader
|
7
|
+
|
8
|
+
import gsMap
|
9
|
+
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
10
|
+
from gsMap.config import CauchyCombinationConfig, ReportConfig
|
11
|
+
from gsMap.diagnosis import run_Diagnosis
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
# Load the Jinja2 environment
|
16
|
+
try:
|
17
|
+
from importlib.resources import files
|
18
|
+
|
19
|
+
template_dir = files('gsMap').joinpath('templates')
|
20
|
+
except (ImportError, FileNotFoundError):
|
21
|
+
# Fallback to a relative path if running in development mode
|
22
|
+
template_dir = os.path.join(os.path.dirname(__file__), 'templates')
|
23
|
+
|
24
|
+
# Set up Jinja2 environment
|
25
|
+
env = Environment(loader=FileSystemLoader(template_dir))
|
26
|
+
|
27
|
+
# Load the template
|
28
|
+
template = env.get_template('report_template.html')
|
29
|
+
|
30
|
+
def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
31
|
+
"""Copy specified files (HTML or PNG) to the report directory."""
|
32
|
+
os.makedirs(report_dir, exist_ok=True)
|
33
|
+
for file in files_to_copy:
|
34
|
+
shutil.copy2(file, os.path.join(report_dir, os.path.basename(file)))
|
35
|
+
|
36
|
+
|
37
|
+
def load_cauchy_table(csv_file):
|
38
|
+
"""Load the Cauchy combination table from a compressed CSV file using Pandas."""
|
39
|
+
df = pd.read_csv(csv_file, compression='gzip')
|
40
|
+
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
41
|
+
return table_data
|
42
|
+
|
43
|
+
|
44
|
+
def load_gene_diagnostic_info(csv_file):
|
45
|
+
"""Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
|
46
|
+
df = pd.read_csv(csv_file)
|
47
|
+
top_50 = df.head(50).to_dict(orient='records')
|
48
|
+
return top_50
|
49
|
+
|
50
|
+
|
51
|
+
def embed_html_content(file_path):
|
52
|
+
"""Read the content of an HTML file and return it as a string."""
|
53
|
+
with open(file_path, 'r') as f:
|
54
|
+
return f.read()
|
55
|
+
|
56
|
+
def check_and_run_cauchy_combination(config):
|
57
|
+
cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
|
58
|
+
if cauchy_result_file.exists():
|
59
|
+
logger.info(
|
60
|
+
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
61
|
+
else:
|
62
|
+
logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
|
63
|
+
cauchy_config = CauchyCombinationConfig(
|
64
|
+
workdir=config.workdir,
|
65
|
+
sample_name=config.sample_name,
|
66
|
+
annotation=config.annotation,
|
67
|
+
trait_name=config.trait_name,
|
68
|
+
)
|
69
|
+
run_Cauchy_combination(cauchy_config)
|
70
|
+
|
71
|
+
df = pd.read_csv(cauchy_result_file, compression='gzip')
|
72
|
+
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
73
|
+
|
74
|
+
return table_data
|
75
|
+
|
76
|
+
def run_report(config: ReportConfig, run_parameters=None):
|
77
|
+
|
78
|
+
logger.info('Running gsMap Diagnosis Module')
|
79
|
+
run_Diagnosis(config)
|
80
|
+
logger.info('gsMap Diagnosis running successfully')
|
81
|
+
|
82
|
+
report_dir = config.get_report_dir(config.trait_name)
|
83
|
+
gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
|
84
|
+
gene_diagnostic_info = load_gene_diagnostic_info(gene_diagnostic_info_file)
|
85
|
+
|
86
|
+
# Load data (Cauchy table and gene diagnostic info)
|
87
|
+
cauchy_table = check_and_run_cauchy_combination(config)
|
88
|
+
|
89
|
+
# Paths to PNGs for gene expression and GSS distribution
|
90
|
+
gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
|
91
|
+
|
92
|
+
gene_plots = []
|
93
|
+
plot_select_gene_list = config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
|
94
|
+
for gene_name in plot_select_gene_list:
|
95
|
+
expression_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
|
96
|
+
gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
|
97
|
+
# check if expression and GSS plots exist
|
98
|
+
if not os.path.exists(expression_png) or not os.path.exists(gss_png):
|
99
|
+
print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
|
100
|
+
continue
|
101
|
+
gene_plots.append({
|
102
|
+
'name': gene_name,
|
103
|
+
'expression_plot': expression_png.relative_to(report_dir), # Path for gene expression plot
|
104
|
+
'gss_plot': gss_png.relative_to(report_dir) # Path for GSS distribution plot
|
105
|
+
})
|
106
|
+
|
107
|
+
# # Copy PNG files to the report directory
|
108
|
+
# copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
|
109
|
+
|
110
|
+
# Update paths to point to copied images inside the report folder
|
111
|
+
# for gene in gene_plots:
|
112
|
+
# gene['expression_plot'] = os.path.join(os.path.basename(gene['expression_plot']))
|
113
|
+
# gene['gss_plot'] = os.path.join(os.path.basename(gene['gss_plot']))
|
114
|
+
|
115
|
+
# Sample data for other report components
|
116
|
+
title = f"{config.sample_name} Genetic Spatial Mapping Report"
|
117
|
+
|
118
|
+
genetic_mapping_plot = embed_html_content(config.get_gsMap_html_plot_save_path(config.trait_name))
|
119
|
+
manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
|
120
|
+
|
121
|
+
gsmap_version = gsMap.__version__
|
122
|
+
# Render the template with dynamic content, including the run parameters
|
123
|
+
|
124
|
+
trait_name = config.trait_name
|
125
|
+
default_run_parameters = {
|
126
|
+
"Sample Name": config.sample_name,
|
127
|
+
"Trait Name": trait_name,
|
128
|
+
"Summary Statistics File": config.sumstats_file,
|
129
|
+
"HDF5 Path": config.hdf5_with_latent_path,
|
130
|
+
"Annotation": config.annotation,
|
131
|
+
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
132
|
+
"Cauchy Directory": config.cauchy_save_dir,
|
133
|
+
"Report Directory": config.get_report_dir(trait_name),
|
134
|
+
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
135
|
+
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
136
|
+
"Report Generation Date": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
|
137
|
+
}
|
138
|
+
|
139
|
+
if run_parameters is not None:
|
140
|
+
default_run_parameters.update(run_parameters)
|
141
|
+
|
142
|
+
|
143
|
+
output_html = template.render(
|
144
|
+
title=title,
|
145
|
+
genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
|
146
|
+
manhattan_plot=manhattan_plot, # Inlined Manhattan plot
|
147
|
+
cauchy_table=cauchy_table,
|
148
|
+
gene_plots=gene_plots, # List of PNG paths for gene plots
|
149
|
+
gsmap_version=gsmap_version,
|
150
|
+
parameters=default_run_parameters, # Pass the run parameters to the template
|
151
|
+
gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
|
152
|
+
)
|
153
|
+
|
154
|
+
# Save the generated HTML report in the 'report' directory
|
155
|
+
report_file = config.get_gsMap_report_file(config.trait_name)
|
156
|
+
with open(report_file, "w") as f:
|
157
|
+
f.write(output_html)
|
158
|
+
|
159
|
+
logger.info(f"Report generated successfully! Saved at {report_file}.")
|
160
|
+
logger.info(f"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report.")
|