gsMap 1.62__py3-none-any.whl → 1.63__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/main.py CHANGED
@@ -1,14 +1,6 @@
1
1
  from gsMap import (__version__)
2
2
  from gsMap.config import *
3
3
 
4
- logger = logging.getLogger(__name__)
5
- logger.setLevel(logging.DEBUG)
6
- handler = logging.StreamHandler()
7
- handler.setFormatter(logging.Formatter(
8
- '[{asctime}] {levelname:8s} {filename} {message}', style='{'))
9
- logger.addHandler(handler)
10
-
11
-
12
4
  def main():
13
5
  parser = create_parser()
14
6
  args = parser.parse_args()
@@ -20,7 +12,7 @@ def main():
20
12
  )
21
13
 
22
14
  def create_parser():
23
- parser = argparse.ArgumentParser(description=" gsMap: Genetics-informed pathogenic spatial mapping",
15
+ parser = argparse.ArgumentParser(description=" gsMap: genetically informed spatial mapping of cells for complex traits",
24
16
  formatter_class=argparse.RawTextHelpFormatter,
25
17
  prog='gsMap'
26
18
  )
gsMap/report.py ADDED
@@ -0,0 +1,160 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+
5
+ import pandas as pd
6
+ from jinja2 import Environment, FileSystemLoader
7
+
8
+ import gsMap
9
+ from gsMap.cauchy_combination_test import run_Cauchy_combination
10
+ from gsMap.config import CauchyCombinationConfig, ReportConfig
11
+ from gsMap.diagnosis import run_Diagnosis
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Load the Jinja2 environment
16
+ try:
17
+ from importlib.resources import files
18
+
19
+ template_dir = files('gsMap').joinpath('templates')
20
+ except (ImportError, FileNotFoundError):
21
+ # Fallback to a relative path if running in development mode
22
+ template_dir = os.path.join(os.path.dirname(__file__), 'templates')
23
+
24
+ # Set up Jinja2 environment
25
+ env = Environment(loader=FileSystemLoader(template_dir))
26
+
27
+ # Load the template
28
+ template = env.get_template('report_template.html')
29
+
30
+ def copy_files_to_report_dir(result_dir, report_dir, files_to_copy):
31
+ """Copy specified files (HTML or PNG) to the report directory."""
32
+ os.makedirs(report_dir, exist_ok=True)
33
+ for file in files_to_copy:
34
+ shutil.copy2(file, os.path.join(report_dir, os.path.basename(file)))
35
+
36
+
37
+ def load_cauchy_table(csv_file):
38
+ """Load the Cauchy combination table from a compressed CSV file using Pandas."""
39
+ df = pd.read_csv(csv_file, compression='gzip')
40
+ table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
41
+ return table_data
42
+
43
+
44
+ def load_gene_diagnostic_info(csv_file):
45
+ """Load the Gene Diagnostic Info CSV file and return the top 50 rows."""
46
+ df = pd.read_csv(csv_file)
47
+ top_50 = df.head(50).to_dict(orient='records')
48
+ return top_50
49
+
50
+
51
+ def embed_html_content(file_path):
52
+ """Read the content of an HTML file and return it as a string."""
53
+ with open(file_path, 'r') as f:
54
+ return f.read()
55
+
56
+ def check_and_run_cauchy_combination(config):
57
+ cauchy_result_file = config.get_cauchy_result_file(config.trait_name)
58
+ if cauchy_result_file.exists():
59
+ logger.info(
60
+ f"Cauchy combination already done for trait {config.trait_name}. Results saved at {cauchy_result_file}. Skipping...")
61
+ else:
62
+ logger.info(f"Running Cauchy combination for trait {config.trait_name}...")
63
+ cauchy_config = CauchyCombinationConfig(
64
+ workdir=config.workdir,
65
+ sample_name=config.sample_name,
66
+ annotation=config.annotation,
67
+ trait_name=config.trait_name,
68
+ )
69
+ run_Cauchy_combination(cauchy_config)
70
+
71
+ df = pd.read_csv(cauchy_result_file, compression='gzip')
72
+ table_data = df[['annotation', 'p_cauchy', 'p_median']].to_dict(orient='records')
73
+
74
+ return table_data
75
+
76
+ def run_report(config: ReportConfig, run_parameters=None):
77
+
78
+ logger.info('Running gsMap Diagnosis Module')
79
+ run_Diagnosis(config)
80
+ logger.info('gsMap Diagnosis running successfully')
81
+
82
+ report_dir = config.get_report_dir(config.trait_name)
83
+ gene_diagnostic_info_file = config.get_gene_diagnostic_info_save_path(config.trait_name)
84
+ gene_diagnostic_info = load_gene_diagnostic_info(gene_diagnostic_info_file)
85
+
86
+ # Load data (Cauchy table and gene diagnostic info)
87
+ cauchy_table = check_and_run_cauchy_combination(config)
88
+
89
+ # Paths to PNGs for gene expression and GSS distribution
90
+ gss_distribution_dir = config.get_GSS_plot_dir(config.trait_name)
91
+
92
+ gene_plots = []
93
+ plot_select_gene_list = config.get_GSS_plot_select_gene_file(config.trait_name).read_text().splitlines()
94
+ for gene_name in plot_select_gene_list:
95
+ expression_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_Expression_Distribution.png"
96
+ gss_png = gss_distribution_dir / f"{config.sample_name}_{gene_name}_GSS_Distribution.png"
97
+ # check if expression and GSS plots exist
98
+ if not os.path.exists(expression_png) or not os.path.exists(gss_png):
99
+ print(f"Skipping gene {gene_name} as expression or GSS plot is missing.")
100
+ continue
101
+ gene_plots.append({
102
+ 'name': gene_name,
103
+ 'expression_plot': expression_png.relative_to(report_dir), # Path for gene expression plot
104
+ 'gss_plot': gss_png.relative_to(report_dir) # Path for GSS distribution plot
105
+ })
106
+
107
+ # # Copy PNG files to the report directory
108
+ # copy_files_to_report_dir(result_dir, report_dir, [gene['expression_plot'] for gene in gene_plots] + [gene['gss_plot'] for gene in gene_plots])
109
+
110
+ # Update paths to point to copied images inside the report folder
111
+ # for gene in gene_plots:
112
+ # gene['expression_plot'] = os.path.join(os.path.basename(gene['expression_plot']))
113
+ # gene['gss_plot'] = os.path.join(os.path.basename(gene['gss_plot']))
114
+
115
+ # Sample data for other report components
116
+ title = f"{config.sample_name} Genetic Spatial Mapping Report"
117
+
118
+ genetic_mapping_plot = embed_html_content(config.get_gsMap_html_plot_save_path(config.trait_name))
119
+ manhattan_plot = embed_html_content(config.get_manhattan_html_plot_path(config.trait_name))
120
+
121
+ gsmap_version = gsMap.__version__
122
+ # Render the template with dynamic content, including the run parameters
123
+
124
+ trait_name = config.trait_name
125
+ default_run_parameters = {
126
+ "Sample Name": config.sample_name,
127
+ "Trait Name": trait_name,
128
+ "Summary Statistics File": config.sumstats_file,
129
+ "HDF5 Path": config.hdf5_with_latent_path,
130
+ "Annotation": config.annotation,
131
+ "Spatial LDSC Save Directory": config.ldsc_save_dir,
132
+ "Cauchy Directory": config.cauchy_save_dir,
133
+ "Report Directory": config.get_report_dir(trait_name),
134
+ "gsMap Report File": config.get_gsMap_report_file(trait_name),
135
+ "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
136
+ "Report Generation Date": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
137
+ }
138
+
139
+ if run_parameters is not None:
140
+ default_run_parameters.update(run_parameters)
141
+
142
+
143
+ output_html = template.render(
144
+ title=title,
145
+ genetic_mapping_plot=genetic_mapping_plot, # Inlined genetic mapping plot
146
+ manhattan_plot=manhattan_plot, # Inlined Manhattan plot
147
+ cauchy_table=cauchy_table,
148
+ gene_plots=gene_plots, # List of PNG paths for gene plots
149
+ gsmap_version=gsmap_version,
150
+ parameters=default_run_parameters, # Pass the run parameters to the template
151
+ gene_diagnostic_info=gene_diagnostic_info # Include top 50 gene diagnostic info rows
152
+ )
153
+
154
+ # Save the generated HTML report in the 'report' directory
155
+ report_file = config.get_gsMap_report_file(config.trait_name)
156
+ with open(report_file, "w") as f:
157
+ f.write(output_html)
158
+
159
+ logger.info(f"Report generated successfully! Saved at {report_file}.")
160
+ logger.info(f"Copy the report directory to your local PC and open the HTML report file in a web browser to view the report.")
gsMap/run_all_mode.py ADDED
@@ -0,0 +1,195 @@
1
+ import logging
2
+ import time
3
+ from pathlib import Path
4
+
5
+ from gsMap.cauchy_combination_test import run_Cauchy_combination
6
+ from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
+ FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
8
+ from gsMap.find_latent_representation import run_find_latent_representation
9
+ from gsMap.generate_ldscore import run_generate_ldscore
10
+ from gsMap.latent_to_gene import run_latent_to_gene
11
+ from gsMap.report import run_report
12
+ from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
+
14
+
15
+
16
+ def format_duration(seconds):
17
+ hours = int(seconds // 3600)
18
+ minutes = int((seconds % 3600) // 60)
19
+ return f"{hours}h {minutes}m"
20
+
21
+
22
+ def run_pipeline(config: RunAllModeConfig):
23
+ # # Set up logging
24
+ log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
25
+ log_file.parent.mkdir(parents=True, exist_ok=True)
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='[{asctime}] {levelname:.5s} | {name} - {message}',
29
+ handlers=[
30
+ logging.FileHandler(log_file),
31
+ ],
32
+ style='{'
33
+ )
34
+
35
+ logger = logging.getLogger('gsMap.pipeline')
36
+ logger.info("Starting pipeline with configuration: %s", config)
37
+
38
+ find_latent_config = FindLatentRepresentationsConfig(
39
+ workdir=config.workdir,
40
+ input_hdf5_path=config.hdf5_path,
41
+ sample_name=config.sample_name,
42
+ annotation=config.annotation,
43
+ data_layer=config.data_layer
44
+ )
45
+
46
+ latent_to_gene_config = LatentToGeneConfig(
47
+ workdir=config.workdir,
48
+ sample_name=config.sample_name,
49
+ annotation=config.annotation,
50
+ latent_representation='latent_GVAE',
51
+ num_neighbour=51,
52
+ num_neighbour_spatial=201,
53
+ homolog_file=config.homolog_file
54
+ )
55
+
56
+ ldscore_config = GenerateLDScoreConfig(
57
+ workdir=config.workdir,
58
+ sample_name=config.sample_name,
59
+ chrom='all',
60
+ # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
+ # mkscore_feather_file=latent_to_gene_config.output_feather_path,
62
+ bfile_root=config.bfile_root,
63
+ keep_snp_root=config.keep_snp_root,
64
+ gtf_annotation_file=config.gtffile,
65
+ spots_per_chunk=5_000,
66
+ baseline_annotation_dir=config.baseline_annotation_dir,
67
+ SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
+ ldscore_save_format='quick_mode'
69
+
70
+ )
71
+
72
+ pipeline_start_time = time.time()
73
+
74
+ # Step 1: Find latent representations
75
+ start_time = time.time()
76
+ logger.info("Step 1: Finding latent representations")
77
+ if Path(find_latent_config.hdf5_with_latent_path).exists():
78
+ logger.info(
79
+ f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
+ else:
81
+ run_find_latent_representation(find_latent_config)
82
+ end_time = time.time()
83
+ logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
+
85
+ # Step 2: Latent to gene
86
+ start_time = time.time()
87
+ logger.info("Step 2: Mapping latent representations to genes")
88
+ if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
+ logger.info(
90
+ f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
91
+ else:
92
+ run_latent_to_gene(latent_to_gene_config)
93
+ end_time = time.time()
94
+ logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
95
+
96
+ # Step 3: Generate LDScores
97
+ start_time = time.time()
98
+ logger.info("Step 3: Generating LDScores")
99
+
100
+ # check if LDscore has been generated by the done file
101
+ ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
102
+ if ldsc_done_file.exists():
103
+ logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
104
+ else:
105
+ run_generate_ldscore(ldscore_config)
106
+ end_time = time.time()
107
+ logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
108
+ # create a done file
109
+ ldsc_done_file.touch()
110
+
111
+ # Step 4: Spatial LDSC
112
+ start_time = time.time()
113
+ logger.info("Step 4: Running spatial LDSC")
114
+
115
+ sumstats_config = config.sumstats_config_dict
116
+ for trait_name in sumstats_config:
117
+ logger.info("Running spatial LDSC for trait: %s", trait_name)
118
+ # detect if the spatial LDSC has been done:
119
+ spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
120
+
121
+ if spatial_ldsc_result_file.exists():
122
+ logger.info(
123
+ f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
124
+ continue
125
+
126
+ spatial_ldsc_config_trait = SpatialLDSCConfig(
127
+ workdir=config.workdir,
128
+ sumstats_file=sumstats_config[trait_name],
129
+ trait_name=trait_name,
130
+ w_file=config.w_file,
131
+ sample_name=config.sample_name,
132
+ # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
+ # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
+ num_processes=config.max_processes,
135
+ ldscore_save_format='quick_mode',
136
+ snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
+ )
138
+ run_spatial_ldsc(spatial_ldsc_config_trait)
139
+ end_time = time.time()
140
+ logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
141
+
142
+ # Step 5: Cauchy combination test
143
+ start_time = time.time()
144
+ logger.info("Step 6: Running Cauchy combination test")
145
+ '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
+ for trait_name in sumstats_config:
147
+ # check if the cauchy combination has been done
148
+ cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
+ if cauchy_result_file.exists():
150
+ logger.info(
151
+ f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
152
+ continue
153
+ cauchy_config = CauchyCombinationConfig(
154
+ workdir=config.workdir,
155
+ sample_name=config.sample_name,
156
+ annotation=config.annotation,
157
+ trait_name=trait_name,
158
+ )
159
+ run_Cauchy_combination(cauchy_config)
160
+ end_time = time.time()
161
+ logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
162
+
163
+ # Step 6: Generate final report
164
+ for trait_name in sumstats_config:
165
+ logger.info("Running final report generation for trait: %s", trait_name)
166
+ report_config = ReportConfig(
167
+ workdir=config.workdir,
168
+ sample_name=config.sample_name,
169
+ annotation=config.annotation,
170
+ trait_name=trait_name,
171
+ plot_type='all',
172
+ top_corr_genes=50,
173
+ selected_genes=None,
174
+ sumstats_file=sumstats_config[trait_name],
175
+ )
176
+ # Create the run parameters dictionary for each trait
177
+ run_parameter_dict = {
178
+ "Sample Name": config.sample_name,
179
+ "Trait Name": trait_name,
180
+ "Summary Statistics File": sumstats_config[trait_name],
181
+ "HDF5 Path": config.hdf5_path,
182
+ "Annotation": config.annotation,
183
+ "Number of Processes": config.max_processes,
184
+ "Spatial LDSC Save Directory": config.ldsc_save_dir,
185
+ "Cauchy Directory": config.cauchy_save_dir,
186
+ "Report Directory": config.get_report_dir(trait_name),
187
+ "gsMap Report File": config.get_gsMap_report_file(trait_name),
188
+ "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
189
+ "Spending Time": format_duration(time.time() - pipeline_start_time),
190
+ }
191
+
192
+ # Pass the run parameter dictionary to the report generation function
193
+ run_report(report_config, run_parameters=run_parameter_dict)
194
+
195
+ logger.info("Pipeline completed successfully.")