gsMap 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/run_all_mode.py CHANGED
@@ -1,195 +1,235 @@
1
- import logging
2
- import time
3
- from pathlib import Path
4
-
5
- from gsMap.cauchy_combination_test import run_Cauchy_combination
6
- from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
- FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
8
- from gsMap.find_latent_representation import run_find_latent_representation
9
- from gsMap.generate_ldscore import run_generate_ldscore
10
- from gsMap.latent_to_gene import run_latent_to_gene
11
- from gsMap.report import run_report
12
- from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
-
14
-
15
-
16
- def format_duration(seconds):
17
- hours = int(seconds // 3600)
18
- minutes = int((seconds % 3600) // 60)
19
- return f"{hours}h {minutes}m"
20
-
21
-
22
- def run_pipeline(config: RunAllModeConfig):
23
- # # Set up logging
24
- log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
25
- log_file.parent.mkdir(parents=True, exist_ok=True)
26
- logging.basicConfig(
27
- level=logging.INFO,
28
- format='[{asctime}] {levelname:.5s} | {name} - {message}',
29
- handlers=[
30
- logging.FileHandler(log_file),
31
- ],
32
- style='{'
33
- )
34
-
35
- logger = logging.getLogger('gsMap.pipeline')
36
- logger.info("Starting pipeline with configuration: %s", config)
37
-
38
- find_latent_config = FindLatentRepresentationsConfig(
39
- workdir=config.workdir,
40
- input_hdf5_path=config.hdf5_path,
41
- sample_name=config.sample_name,
42
- annotation=config.annotation,
43
- data_layer=config.data_layer
44
- )
45
-
46
- latent_to_gene_config = LatentToGeneConfig(
47
- workdir=config.workdir,
48
- sample_name=config.sample_name,
49
- annotation=config.annotation,
50
- latent_representation='latent_GVAE',
51
- num_neighbour=51,
52
- num_neighbour_spatial=201,
53
- homolog_file=config.homolog_file
54
- )
55
-
56
- ldscore_config = GenerateLDScoreConfig(
57
- workdir=config.workdir,
58
- sample_name=config.sample_name,
59
- chrom='all',
60
- # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
- # mkscore_feather_file=latent_to_gene_config.output_feather_path,
62
- bfile_root=config.bfile_root,
63
- keep_snp_root=config.keep_snp_root,
64
- gtf_annotation_file=config.gtffile,
65
- spots_per_chunk=5_000,
66
- baseline_annotation_dir=config.baseline_annotation_dir,
67
- SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
- ldscore_save_format='quick_mode'
69
-
70
- )
71
-
72
- pipeline_start_time = time.time()
73
-
74
- # Step 1: Find latent representations
75
- start_time = time.time()
76
- logger.info("Step 1: Finding latent representations")
77
- if Path(find_latent_config.hdf5_with_latent_path).exists():
78
- logger.info(
79
- f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
- else:
81
- run_find_latent_representation(find_latent_config)
82
- end_time = time.time()
83
- logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
-
85
- # Step 2: Latent to gene
86
- start_time = time.time()
87
- logger.info("Step 2: Mapping latent representations to genes")
88
- if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
- logger.info(
90
- f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
91
- else:
92
- run_latent_to_gene(latent_to_gene_config)
93
- end_time = time.time()
94
- logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
95
-
96
- # Step 3: Generate LDScores
97
- start_time = time.time()
98
- logger.info("Step 3: Generating LDScores")
99
-
100
- # check if LDscore has been generated by the done file
101
- ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
102
- if ldsc_done_file.exists():
103
- logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
104
- else:
105
- run_generate_ldscore(ldscore_config)
106
- end_time = time.time()
107
- logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
108
- # create a done file
109
- ldsc_done_file.touch()
110
-
111
- # Step 4: Spatial LDSC
112
- start_time = time.time()
113
- logger.info("Step 4: Running spatial LDSC")
114
-
115
- sumstats_config = config.sumstats_config_dict
116
- for trait_name in sumstats_config:
117
- logger.info("Running spatial LDSC for trait: %s", trait_name)
118
- # detect if the spatial LDSC has been done:
119
- spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
120
-
121
- if spatial_ldsc_result_file.exists():
122
- logger.info(
123
- f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
124
- continue
125
-
126
- spatial_ldsc_config_trait = SpatialLDSCConfig(
127
- workdir=config.workdir,
128
- sumstats_file=sumstats_config[trait_name],
129
- trait_name=trait_name,
130
- w_file=config.w_file,
131
- sample_name=config.sample_name,
132
- # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
- # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
- num_processes=config.max_processes,
135
- ldscore_save_format='quick_mode',
136
- snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
- )
138
- run_spatial_ldsc(spatial_ldsc_config_trait)
139
- end_time = time.time()
140
- logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
141
-
142
- # Step 5: Cauchy combination test
143
- start_time = time.time()
144
- logger.info("Step 6: Running Cauchy combination test")
145
- '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
- for trait_name in sumstats_config:
147
- # check if the cauchy combination has been done
148
- cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
- if cauchy_result_file.exists():
150
- logger.info(
151
- f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
152
- continue
153
- cauchy_config = CauchyCombinationConfig(
154
- workdir=config.workdir,
155
- sample_name=config.sample_name,
156
- annotation=config.annotation,
157
- trait_name=trait_name,
158
- )
159
- run_Cauchy_combination(cauchy_config)
160
- end_time = time.time()
161
- logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
162
-
163
- # Step 6: Generate final report
164
- for trait_name in sumstats_config:
165
- logger.info("Running final report generation for trait: %s", trait_name)
166
- report_config = ReportConfig(
167
- workdir=config.workdir,
168
- sample_name=config.sample_name,
169
- annotation=config.annotation,
170
- trait_name=trait_name,
171
- plot_type='all',
172
- top_corr_genes=50,
173
- selected_genes=None,
174
- sumstats_file=sumstats_config[trait_name],
175
- )
176
- # Create the run parameters dictionary for each trait
177
- run_parameter_dict = {
178
- "Sample Name": config.sample_name,
179
- "Trait Name": trait_name,
180
- "Summary Statistics File": sumstats_config[trait_name],
181
- "HDF5 Path": config.hdf5_path,
182
- "Annotation": config.annotation,
183
- "Number of Processes": config.max_processes,
184
- "Spatial LDSC Save Directory": config.ldsc_save_dir,
185
- "Cauchy Directory": config.cauchy_save_dir,
186
- "Report Directory": config.get_report_dir(trait_name),
187
- "gsMap Report File": config.get_gsMap_report_file(trait_name),
188
- "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
189
- "Spending Time": format_duration(time.time() - pipeline_start_time),
190
- }
191
-
192
- # Pass the run parameter dictionary to the report generation function
193
- run_report(report_config, run_parameters=run_parameter_dict)
194
-
195
- logger.info("Pipeline completed successfully.")
1
+ import logging
2
+ import time
3
+ from pathlib import Path
4
+
5
+ from gsMap.cauchy_combination_test import run_Cauchy_combination
6
+ from gsMap.config import (
7
+ CauchyCombinationConfig,
8
+ FindLatentRepresentationsConfig,
9
+ GenerateLDScoreConfig,
10
+ LatentToGeneConfig,
11
+ ReportConfig,
12
+ RunAllModeConfig,
13
+ SpatialLDSCConfig,
14
+ )
15
+ from gsMap.find_latent_representation import run_find_latent_representation
16
+ from gsMap.generate_ldscore import run_generate_ldscore
17
+ from gsMap.latent_to_gene import run_latent_to_gene
18
+ from gsMap.report import run_report
19
+ from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
20
+
21
+
22
+ def format_duration(seconds):
23
+ hours = int(seconds // 3600)
24
+ minutes = int((seconds % 3600) // 60)
25
+ return f"{hours}h {minutes}m"
26
+
27
+
28
+ def run_pipeline(config: RunAllModeConfig):
29
+ # # Set up logging
30
+ _current_datatime = time.strftime("%Y%m%d_%H%M%S")
31
+ log_file = (
32
+ Path(config.workdir)
33
+ / config.sample_name
34
+ / f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
35
+ )
36
+ log_file.parent.mkdir(parents=True, exist_ok=True)
37
+ logging.basicConfig(
38
+ level=logging.INFO,
39
+ format="[{asctime}] {levelname:.5s} | {name} - {message}",
40
+ handlers=[
41
+ logging.FileHandler(log_file),
42
+ ],
43
+ style="{",
44
+ )
45
+
46
+ logger = logging.getLogger("gsMap.pipeline")
47
+ logger.info("Starting pipeline with configuration: %s", config)
48
+ pipeline_start_time = time.time()
49
+
50
+ # Step 1: Find latent representations
51
+ if config.latent_representation is not None:
52
+ logger.warning(
53
+ f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
54
+ )
55
+ logger.info(
56
+ "Skipping step 1: Find latent representations, as latent representation is provided."
57
+ )
58
+ latent_to_gene_input_hdf5_path = config.hdf5_path
59
+ else:
60
+ latent_to_gene_input_hdf5_path = None
61
+ logger.info(
62
+ "No latent representation provided. Will run the Find_latent_representations step."
63
+ )
64
+ find_latent_config = FindLatentRepresentationsConfig(
65
+ workdir=config.workdir,
66
+ input_hdf5_path=config.hdf5_path,
67
+ sample_name=config.sample_name,
68
+ annotation=config.annotation,
69
+ data_layer=config.data_layer,
70
+ )
71
+
72
+ # Step 1: Find latent representations
73
+ start_time = time.time()
74
+
75
+ logger.info("Step 1: Finding latent representations")
76
+ if Path(find_latent_config.hdf5_with_latent_path).exists():
77
+ logger.info(
78
+ f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
79
+ )
80
+ else:
81
+ run_find_latent_representation(find_latent_config)
82
+ end_time = time.time()
83
+ logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
+
85
+ latent_to_gene_config = LatentToGeneConfig(
86
+ input_hdf5_path=latent_to_gene_input_hdf5_path,
87
+ workdir=config.workdir,
88
+ sample_name=config.sample_name,
89
+ annotation=config.annotation,
90
+ latent_representation=config.latent_representation,
91
+ num_neighbour=config.num_neighbour,
92
+ num_neighbour_spatial=config.num_neighbour_spatial,
93
+ homolog_file=config.homolog_file,
94
+ gM_slices=config.gM_slices,
95
+ )
96
+
97
+ ldscore_config = GenerateLDScoreConfig(
98
+ workdir=config.workdir,
99
+ sample_name=config.sample_name,
100
+ chrom="all",
101
+ bfile_root=config.bfile_root,
102
+ keep_snp_root=config.keep_snp_root,
103
+ gtf_annotation_file=config.gtffile,
104
+ spots_per_chunk=5_000,
105
+ baseline_annotation_dir=config.baseline_annotation_dir,
106
+ SNP_gene_pair_dir=config.SNP_gene_pair_dir,
107
+ ldscore_save_format="quick_mode",
108
+ )
109
+
110
+ # Step 2: Latent to gene
111
+ start_time = time.time()
112
+ logger.info("Step 2: Mapping latent representations to genes")
113
+ if Path(latent_to_gene_config.mkscore_feather_path).exists():
114
+ logger.info(
115
+ f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
116
+ )
117
+ else:
118
+ run_latent_to_gene(latent_to_gene_config)
119
+ end_time = time.time()
120
+ logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
121
+
122
+ # Step 3: Generate LDScores
123
+ start_time = time.time()
124
+ logger.info("Step 3: Generating LDScores")
125
+
126
+ # check if LDscore has been generated by the done file
127
+ ldsc_done_file = (
128
+ Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
129
+ )
130
+ if ldsc_done_file.exists():
131
+ logger.info(
132
+ f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
133
+ )
134
+ else:
135
+ run_generate_ldscore(ldscore_config)
136
+ end_time = time.time()
137
+ logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
138
+ # create a done file
139
+ ldsc_done_file.touch()
140
+
141
+ # Step 4: Spatial LDSC
142
+ start_time = time.time()
143
+ logger.info("Step 4: Running spatial LDSC")
144
+
145
+ sumstats_config = config.sumstats_config_dict
146
+ for trait_name in sumstats_config:
147
+ logger.info("Running spatial LDSC for trait: %s", trait_name)
148
+ # detect if the spatial LDSC has been done:
149
+ spatial_ldsc_result_file = (
150
+ Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
151
+ )
152
+
153
+ if spatial_ldsc_result_file.exists():
154
+ logger.info(
155
+ f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
156
+ )
157
+ continue
158
+
159
+ spatial_ldsc_config_trait = SpatialLDSCConfig(
160
+ workdir=config.workdir,
161
+ sumstats_file=sumstats_config[trait_name],
162
+ trait_name=trait_name,
163
+ w_file=config.w_file,
164
+ sample_name=config.sample_name,
165
+ # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
166
+ # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
167
+ num_processes=config.max_processes,
168
+ ldscore_save_format="quick_mode",
169
+ snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
170
+ )
171
+ run_spatial_ldsc(spatial_ldsc_config_trait)
172
+ end_time = time.time()
173
+ logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
174
+
175
+ # Step 5: Cauchy combination test
176
+ start_time = time.time()
177
+ logger.info("Step 6: Running Cauchy combination test")
178
+ for trait_name in sumstats_config:
179
+ # check if the cauchy combination has been done
180
+ cauchy_result_file = config.get_cauchy_result_file(trait_name)
181
+ if cauchy_result_file.exists():
182
+ logger.info(
183
+ f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
184
+ )
185
+ continue
186
+ cauchy_config = CauchyCombinationConfig(
187
+ workdir=config.workdir,
188
+ sample_name=config.sample_name,
189
+ annotation=config.annotation,
190
+ trait_name=trait_name,
191
+ )
192
+ run_Cauchy_combination(cauchy_config)
193
+ end_time = time.time()
194
+ logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
195
+
196
+ # Step 6: Generate final report
197
+ for trait_name in sumstats_config:
198
+ logger.info("Running final report generation for trait: %s", trait_name)
199
+ report_config = ReportConfig(
200
+ workdir=config.workdir,
201
+ sample_name=config.sample_name,
202
+ annotation=config.annotation,
203
+ trait_name=trait_name,
204
+ plot_type="all",
205
+ top_corr_genes=50,
206
+ selected_genes=None,
207
+ sumstats_file=sumstats_config[trait_name],
208
+ )
209
+ gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
210
+ if Path(gsMap_report_file).exists():
211
+ logger.info(
212
+ f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
213
+ )
214
+ continue
215
+
216
+ # Create the run parameters dictionary for each trait
217
+ run_parameter_dict = {
218
+ "Sample Name": config.sample_name,
219
+ "Trait Name": trait_name,
220
+ "Summary Statistics File": sumstats_config[trait_name],
221
+ "HDF5 Path": config.hdf5_path,
222
+ "Annotation": config.annotation,
223
+ "Number of Processes": config.max_processes,
224
+ "Spatial LDSC Save Directory": config.ldsc_save_dir,
225
+ "Cauchy Directory": config.cauchy_save_dir,
226
+ "Report Directory": config.get_report_dir(trait_name),
227
+ "gsMap Report File": config.get_gsMap_report_file(trait_name),
228
+ "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
229
+ "Spending Time": format_duration(time.time() - pipeline_start_time),
230
+ }
231
+
232
+ # Pass the run parameter dictionary to the report generation function
233
+ run_report(report_config, run_parameters=run_parameter_dict)
234
+
235
+ logger.info("Pipeline completed successfully.")
gsMap/setup.py CHANGED
@@ -2,4 +2,4 @@
2
2
  import setuptools
3
3
 
4
4
  if __name__ == "__main__":
5
- setuptools.setup(name='gsMap')
5
+ setuptools.setup(name="gsMap")