gsMap 1.62__py3-none-any.whl → 1.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN_VAE/adjacency_matrix.py +1 -1
- gsMap/GNN_VAE/model.py +5 -5
- gsMap/GNN_VAE/train.py +1 -1
- gsMap/__init__.py +1 -1
- gsMap/cauchy_combination_test.py +14 -36
- gsMap/config.py +473 -404
- gsMap/diagnosis.py +273 -0
- gsMap/find_latent_representation.py +22 -86
- gsMap/format_sumstats.py +79 -82
- gsMap/generate_ldscore.py +145 -78
- gsMap/latent_to_gene.py +65 -104
- gsMap/main.py +1 -9
- gsMap/report.py +160 -0
- gsMap/run_all_mode.py +195 -0
- gsMap/spatial_ldsc_multiple_sumstats.py +188 -113
- gsMap/templates/report_template.html +198 -0
- gsMap/utils/__init__.py +0 -0
- gsMap/{generate_r2_matrix.py → utils/generate_r2_matrix.py} +2 -10
- gsMap/{make_annotations.py → utils/make_annotations.py} +1 -43
- gsMap/utils/manhattan_plot.py +639 -0
- gsMap/{regression_read.py → utils/regression_read.py} +1 -1
- gsMap/visualize.py +100 -55
- {gsmap-1.62.dist-info → gsmap-1.64.dist-info}/METADATA +21 -46
- gsmap-1.64.dist-info/RECORD +30 -0
- gsmap-1.62.dist-info/RECORD +0 -24
- /gsMap/{jackknife.py → utils/jackknife.py} +0 -0
- {gsmap-1.62.dist-info → gsmap-1.64.dist-info}/LICENSE +0 -0
- {gsmap-1.62.dist-info → gsmap-1.64.dist-info}/WHEEL +0 -0
- {gsmap-1.62.dist-info → gsmap-1.64.dist-info}/entry_points.txt +0 -0
gsMap/main.py
CHANGED
@@ -1,14 +1,6 @@
|
|
1
1
|
from gsMap import (__version__)
|
2
2
|
from gsMap.config import *
|
3
3
|
|
4
|
-
logger = logging.getLogger(__name__)
|
5
|
-
logger.setLevel(logging.DEBUG)
|
6
|
-
handler = logging.StreamHandler()
|
7
|
-
handler.setFormatter(logging.Formatter(
|
8
|
-
'[{asctime}] {levelname:8s} {filename} {message}', style='{'))
|
9
|
-
logger.addHandler(handler)
|
10
|
-
|
11
|
-
|
12
4
|
def main():
|
13
5
|
parser = create_parser()
|
14
6
|
args = parser.parse_args()
|
@@ -20,7 +12,7 @@ def main():
|
|
20
12
|
)
|
21
13
|
|
22
14
|
def create_parser():
|
23
|
-
parser = argparse.ArgumentParser(description=" gsMap:
|
15
|
+
parser = argparse.ArgumentParser(description=" gsMap: genetically informed spatial mapping of cells for complex traits",
|
24
16
|
formatter_class=argparse.RawTextHelpFormatter,
|
25
17
|
prog='gsMap'
|
26
18
|
)
|
gsMap/report.py
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import shutil
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from jinja2 import Environment, FileSystemLoader
|
7
|
+
|
8
|
+
import gsMap
|
9
|
+
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
10
|
+
from gsMap.config import CauchyCombinationConfig, ReportConfig
|
11
|
+
from gsMap.diagnosis import run_Diagnosis
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
# Load the Jinja2 environment
|
16
|
+
try:
|
17
|
+
from importlib.resources import files
|
18
|
+
|
19
|
+
template_dir = files('gsMap').joinpath('templates')
|
20
|
+
except (ImportError, FileNotFoundError):
|
21
|
+
# Fallback to a relative path if running in development mode
|
22
|
+
template_dir = os.path.join(os.path.dirname(__file__), 'templates')
|
23
|
+
|
24
|
+
# Set up Jinja2 environment
|
25
|
+
env = Environment(loader=FileSystemLoader(template_dir))
|
26
|
+
|
27
|
+
# Load the template
|
28
|
+
template = env.get_template('report_template.html')
|
29
|
+
|
30
|
+
def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
|
31
|
+
"""Copy specified files (HTML or PNG) to the report directory."""
|
32
|
+
os.makedirs(report_dir, exist_ok=True)
|
33
|
+
for file in files_to_copy:
|
34
|
+
shutil.copy2(file, os.path.join(report_dir, os.path.basename(file)))
|
35
|
+
|
36
|
+
|
37
|
+
def load_cauchy_table(csv_file):
|
38
|
+
"""Load the Cauchy combination table from a compressed CSV file using Pandas."""
|
39
|
+
df = pd.read_csv(csv_file, compression='gzip')
|
40
|
+
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
41
|
+
return table_data
|
42
|
+
|
43
|
+
|
44
|
+
def load_gene_diagnostic_info(csv_file):
|
45
|
+
"""Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
|
46
|
+
df = pd.read_csv(csv_file)
|
47
|
+
top_50 = df.head(50).to_dict(orient='records')
|
48
|
+
return top_50
|
49
|
+
|
50
|
+
|
51
|
+
def embed_html_content(file_path):
|
52
|
+
"""Read the content of an HTML file and return it as a string."""
|
53
|
+
with open(file_path, 'r') as f:
|
54
|
+
return f.read()
|
55
|
+
|
56
|
+
def check_and_run_cauchy_combination(config):
|
57
|
+
cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
|
58
|
+
if cauchy_result_file.exists():
|
59
|
+
logger.info(
|
60
|
+
f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
61
|
+
else:
|
62
|
+
logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
|
63
|
+
cauchy_config = CauchyCombinationConfig(
|
64
|
+
workdir=config.workdir,
|
65
|
+
sample_name=config.sample_name,
|
66
|
+
annotation=config.annotation,
|
67
|
+
trait_name=config.trait_name,
|
68
|
+
)
|
69
|
+
run_Cauchy_combination(cauchy_config)
|
70
|
+
|
71
|
+
df = pd.read_csv(cauchy_result_file, compression='gzip')
|
72
|
+
table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
|
73
|
+
|
74
|
+
return table_data
|
75
|
+
|
76
|
+
def run_report(config: ReportConfig, run_parameters=None):
|
77
|
+
|
78
|
+
logger.info('Running gsMap Diagnosis Module')
|
79
|
+
run_Diagnosis(config)
|
80
|
+
logger.info('gsMap Diagnosis running successfully')
|
81
|
+
|
82
|
+
report_dir = config.get_report_dir(config.trait_name)
|
83
|
+
gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
|
84
|
+
gene_diagnostic_info = load_gene_diagnostic_info(gene_diagnostic_info_file)
|
85
|
+
|
86
|
+
# Load data (Cauchy table and gene diagnostic info)
|
87
|
+
cauchy_table = check_and_run_cauchy_combination(config)
|
88
|
+
|
89
|
+
# Paths to PNGs for gene expression and GSS distribution
|
90
|
+
gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
|
91
|
+
|
92
|
+
gene_plots = []
|
93
|
+
plot_select_gene_list = config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
|
94
|
+
for gene_name in plot_select_gene_list:
|
95
|
+
expression_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
|
96
|
+
gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
|
97
|
+
# check if expression and GSS plots exist
|
98
|
+
if not os.path.exists(expression_png) or not os.path.exists(gss_png):
|
99
|
+
print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
|
100
|
+
continue
|
101
|
+
gene_plots.append({
|
102
|
+
'name': gene_name,
|
103
|
+
'expression_plot': expression_png.relative_to(report_dir), # Path for gene expression plot
|
104
|
+
'gss_plot': gss_png.relative_to(report_dir) # Path for GSS distribution plot
|
105
|
+
})
|
106
|
+
|
107
|
+
# # Copy PNG files to the report directory
|
108
|
+
# copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
|
109
|
+
|
110
|
+
# Update paths to point to copied images inside the report folder
|
111
|
+
# for gene in gene_plots:
|
112
|
+
# gene['expression_plot'] = os.path.join(os.path.basename(gene['expression_plot']))
|
113
|
+
# gene['gss_plot'] = os.path.join(os.path.basename(gene['gss_plot']))
|
114
|
+
|
115
|
+
# Sample data for other report components
|
116
|
+
title = f"{config.sample_name} Genetic Spatial Mapping Report"
|
117
|
+
|
118
|
+
genetic_mapping_plot = embed_html_content(config.get_gsMap_html_plot_save_path(config.trait_name))
|
119
|
+
manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
|
120
|
+
|
121
|
+
gsmap_version = gsMap.__version__
|
122
|
+
# Render the template with dynamic content, including the run parameters
|
123
|
+
|
124
|
+
trait_name = config.trait_name
|
125
|
+
default_run_parameters = {
|
126
|
+
"Sample Name": config.sample_name,
|
127
|
+
"Trait Name": trait_name,
|
128
|
+
"Summary Statistics File": config.sumstats_file,
|
129
|
+
"HDF5 Path": config.hdf5_with_latent_path,
|
130
|
+
"Annotation": config.annotation,
|
131
|
+
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
132
|
+
"Cauchy Directory": config.cauchy_save_dir,
|
133
|
+
"Report Directory": config.get_report_dir(trait_name),
|
134
|
+
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
135
|
+
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
136
|
+
"Report Generation Date": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
|
137
|
+
}
|
138
|
+
|
139
|
+
if run_parameters is not None:
|
140
|
+
default_run_parameters.update(run_parameters)
|
141
|
+
|
142
|
+
|
143
|
+
output_html = template.render(
|
144
|
+
title=title,
|
145
|
+
genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
|
146
|
+
manhattan_plot=manhattan_plot, # Inlined Manhattan plot
|
147
|
+
cauchy_table=cauchy_table,
|
148
|
+
gene_plots=gene_plots, # List of PNG paths for gene plots
|
149
|
+
gsmap_version=gsmap_version,
|
150
|
+
parameters=default_run_parameters, # Pass the run parameters to the template
|
151
|
+
gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
|
152
|
+
)
|
153
|
+
|
154
|
+
# Save the generated HTML report in the 'report' directory
|
155
|
+
report_file = config.get_gsMap_report_file(config.trait_name)
|
156
|
+
with open(report_file, "w") as f:
|
157
|
+
f.write(output_html)
|
158
|
+
|
159
|
+
logger.info(f"Report generated successfully! Saved at {report_file}.")
|
160
|
+
logger.info(f"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report.")
|
gsMap/run_all_mode.py
ADDED
@@ -0,0 +1,195 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
6
|
+
from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
|
7
|
+
FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
|
8
|
+
from gsMap.find_latent_representation import run_find_latent_representation
|
9
|
+
from gsMap.generate_ldscore import run_generate_ldscore
|
10
|
+
from gsMap.latent_to_gene import run_latent_to_gene
|
11
|
+
from gsMap.report import run_report
|
12
|
+
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def format_duration(seconds):
|
17
|
+
hours = int(seconds // 3600)
|
18
|
+
minutes = int((seconds % 3600) // 60)
|
19
|
+
return f"{hours}h {minutes}m"
|
20
|
+
|
21
|
+
|
22
|
+
def run_pipeline(config: RunAllModeConfig):
|
23
|
+
# # Set up logging
|
24
|
+
log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
|
25
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
26
|
+
logging.basicConfig(
|
27
|
+
level=logging.INFO,
|
28
|
+
format='[{asctime}] {levelname:.5s} | {name} - {message}',
|
29
|
+
handlers=[
|
30
|
+
logging.FileHandler(log_file),
|
31
|
+
],
|
32
|
+
style='{'
|
33
|
+
)
|
34
|
+
|
35
|
+
logger = logging.getLogger('gsMap.pipeline')
|
36
|
+
logger.info("Starting pipeline with configuration: %s", config)
|
37
|
+
|
38
|
+
find_latent_config = FindLatentRepresentationsConfig(
|
39
|
+
workdir=config.workdir,
|
40
|
+
input_hdf5_path=config.hdf5_path,
|
41
|
+
sample_name=config.sample_name,
|
42
|
+
annotation=config.annotation,
|
43
|
+
data_layer=config.data_layer
|
44
|
+
)
|
45
|
+
|
46
|
+
latent_to_gene_config = LatentToGeneConfig(
|
47
|
+
workdir=config.workdir,
|
48
|
+
sample_name=config.sample_name,
|
49
|
+
annotation=config.annotation,
|
50
|
+
latent_representation='latent_GVAE',
|
51
|
+
num_neighbour=51,
|
52
|
+
num_neighbour_spatial=201,
|
53
|
+
homolog_file=config.homolog_file
|
54
|
+
)
|
55
|
+
|
56
|
+
ldscore_config = GenerateLDScoreConfig(
|
57
|
+
workdir=config.workdir,
|
58
|
+
sample_name=config.sample_name,
|
59
|
+
chrom='all',
|
60
|
+
# ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
|
61
|
+
# mkscore_feather_file=latent_to_gene_config.output_feather_path,
|
62
|
+
bfile_root=config.bfile_root,
|
63
|
+
keep_snp_root=config.keep_snp_root,
|
64
|
+
gtf_annotation_file=config.gtffile,
|
65
|
+
spots_per_chunk=5_000,
|
66
|
+
baseline_annotation_dir=config.baseline_annotation_dir,
|
67
|
+
SNP_gene_pair_dir=config.SNP_gene_pair_dir,
|
68
|
+
ldscore_save_format='quick_mode'
|
69
|
+
|
70
|
+
)
|
71
|
+
|
72
|
+
pipeline_start_time = time.time()
|
73
|
+
|
74
|
+
# Step 1: Find latent representations
|
75
|
+
start_time = time.time()
|
76
|
+
logger.info("Step 1: Finding latent representations")
|
77
|
+
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
+
logger.info(
|
79
|
+
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
|
80
|
+
else:
|
81
|
+
run_find_latent_representation(find_latent_config)
|
82
|
+
end_time = time.time()
|
83
|
+
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
84
|
+
|
85
|
+
# Step 2: Latent to gene
|
86
|
+
start_time = time.time()
|
87
|
+
logger.info("Step 2: Mapping latent representations to genes")
|
88
|
+
if Path(latent_to_gene_config.mkscore_feather_path).exists():
|
89
|
+
logger.info(
|
90
|
+
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
|
91
|
+
else:
|
92
|
+
run_latent_to_gene(latent_to_gene_config)
|
93
|
+
end_time = time.time()
|
94
|
+
logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
|
95
|
+
|
96
|
+
# Step 3: Generate LDScores
|
97
|
+
start_time = time.time()
|
98
|
+
logger.info("Step 3: Generating LDScores")
|
99
|
+
|
100
|
+
# check if LDscore has been generated by the done file
|
101
|
+
ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
|
102
|
+
if ldsc_done_file.exists():
|
103
|
+
logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
|
104
|
+
else:
|
105
|
+
run_generate_ldscore(ldscore_config)
|
106
|
+
end_time = time.time()
|
107
|
+
logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
|
108
|
+
# create a done file
|
109
|
+
ldsc_done_file.touch()
|
110
|
+
|
111
|
+
# Step 4: Spatial LDSC
|
112
|
+
start_time = time.time()
|
113
|
+
logger.info("Step 4: Running spatial LDSC")
|
114
|
+
|
115
|
+
sumstats_config = config.sumstats_config_dict
|
116
|
+
for trait_name in sumstats_config:
|
117
|
+
logger.info("Running spatial LDSC for trait: %s", trait_name)
|
118
|
+
# detect if the spatial LDSC has been done:
|
119
|
+
spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
|
120
|
+
|
121
|
+
if spatial_ldsc_result_file.exists():
|
122
|
+
logger.info(
|
123
|
+
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
|
124
|
+
continue
|
125
|
+
|
126
|
+
spatial_ldsc_config_trait = SpatialLDSCConfig(
|
127
|
+
workdir=config.workdir,
|
128
|
+
sumstats_file=sumstats_config[trait_name],
|
129
|
+
trait_name=trait_name,
|
130
|
+
w_file=config.w_file,
|
131
|
+
sample_name=config.sample_name,
|
132
|
+
# ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
|
133
|
+
# ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
|
134
|
+
num_processes=config.max_processes,
|
135
|
+
ldscore_save_format='quick_mode',
|
136
|
+
snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
|
137
|
+
)
|
138
|
+
run_spatial_ldsc(spatial_ldsc_config_trait)
|
139
|
+
end_time = time.time()
|
140
|
+
logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
|
141
|
+
|
142
|
+
# Step 5: Cauchy combination test
|
143
|
+
start_time = time.time()
|
144
|
+
logger.info("Step 6: Running Cauchy combination test")
|
145
|
+
'/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
|
146
|
+
for trait_name in sumstats_config:
|
147
|
+
# check if the cauchy combination has been done
|
148
|
+
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
149
|
+
if cauchy_result_file.exists():
|
150
|
+
logger.info(
|
151
|
+
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
152
|
+
continue
|
153
|
+
cauchy_config = CauchyCombinationConfig(
|
154
|
+
workdir=config.workdir,
|
155
|
+
sample_name=config.sample_name,
|
156
|
+
annotation=config.annotation,
|
157
|
+
trait_name=trait_name,
|
158
|
+
)
|
159
|
+
run_Cauchy_combination(cauchy_config)
|
160
|
+
end_time = time.time()
|
161
|
+
logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
|
162
|
+
|
163
|
+
# Step 6: Generate final report
|
164
|
+
for trait_name in sumstats_config:
|
165
|
+
logger.info("Running final report generation for trait: %s", trait_name)
|
166
|
+
report_config = ReportConfig(
|
167
|
+
workdir=config.workdir,
|
168
|
+
sample_name=config.sample_name,
|
169
|
+
annotation=config.annotation,
|
170
|
+
trait_name=trait_name,
|
171
|
+
plot_type='all',
|
172
|
+
top_corr_genes=50,
|
173
|
+
selected_genes=None,
|
174
|
+
sumstats_file=sumstats_config[trait_name],
|
175
|
+
)
|
176
|
+
# Create the run parameters dictionary for each trait
|
177
|
+
run_parameter_dict = {
|
178
|
+
"Sample Name": config.sample_name,
|
179
|
+
"Trait Name": trait_name,
|
180
|
+
"Summary Statistics File": sumstats_config[trait_name],
|
181
|
+
"HDF5 Path": config.hdf5_path,
|
182
|
+
"Annotation": config.annotation,
|
183
|
+
"Number of Processes": config.max_processes,
|
184
|
+
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
185
|
+
"Cauchy Directory": config.cauchy_save_dir,
|
186
|
+
"Report Directory": config.get_report_dir(trait_name),
|
187
|
+
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
188
|
+
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
189
|
+
"Spending Time": format_duration(time.time() - pipeline_start_time),
|
190
|
+
}
|
191
|
+
|
192
|
+
# Pass the run parameter dictionary to the report generation function
|
193
|
+
run_report(report_config, run_parameters=run_parameter_dict)
|
194
|
+
|
195
|
+
logger.info("Pipeline completed successfully.")
|