gsMap 1.67__py3-none-any.whl → 1.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/run_all_mode.py CHANGED
@@ -1,195 +1,195 @@
1
- import logging
2
- import time
3
- from pathlib import Path
4
-
5
- from gsMap.cauchy_combination_test import run_Cauchy_combination
6
- from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
- FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
8
- from gsMap.find_latent_representation import run_find_latent_representation
9
- from gsMap.generate_ldscore import run_generate_ldscore
10
- from gsMap.latent_to_gene import run_latent_to_gene
11
- from gsMap.report import run_report
12
- from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
-
14
-
15
-
16
- def format_duration(seconds):
17
- hours = int(seconds // 3600)
18
- minutes = int((seconds % 3600) // 60)
19
- return f"{hours}h {minutes}m"
20
-
21
-
22
- def run_pipeline(config: RunAllModeConfig):
23
- # # Set up logging
24
- log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
25
- log_file.parent.mkdir(parents=True, exist_ok=True)
26
- logging.basicConfig(
27
- level=logging.INFO,
28
- format='[{asctime}] {levelname:.5s} | {name} - {message}',
29
- handlers=[
30
- logging.FileHandler(log_file),
31
- ],
32
- style='{'
33
- )
34
-
35
- logger = logging.getLogger('gsMap.pipeline')
36
- logger.info("Starting pipeline with configuration: %s", config)
37
-
38
- find_latent_config = FindLatentRepresentationsConfig(
39
- workdir=config.workdir,
40
- input_hdf5_path=config.hdf5_path,
41
- sample_name=config.sample_name,
42
- annotation=config.annotation,
43
- data_layer=config.data_layer
44
- )
45
-
46
- latent_to_gene_config = LatentToGeneConfig(
47
- workdir=config.workdir,
48
- sample_name=config.sample_name,
49
- annotation=config.annotation,
50
- latent_representation='latent_GVAE',
51
- num_neighbour=51,
52
- num_neighbour_spatial=201,
53
- homolog_file=config.homolog_file
54
- )
55
-
56
- ldscore_config = GenerateLDScoreConfig(
57
- workdir=config.workdir,
58
- sample_name=config.sample_name,
59
- chrom='all',
60
- # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
- # mkscore_feather_file=latent_to_gene_config.output_feather_path,
62
- bfile_root=config.bfile_root,
63
- keep_snp_root=config.keep_snp_root,
64
- gtf_annotation_file=config.gtffile,
65
- spots_per_chunk=5_000,
66
- baseline_annotation_dir=config.baseline_annotation_dir,
67
- SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
- ldscore_save_format='quick_mode'
69
-
70
- )
71
-
72
- pipeline_start_time = time.time()
73
-
74
- # Step 1: Find latent representations
75
- start_time = time.time()
76
- logger.info("Step 1: Finding latent representations")
77
- if Path(find_latent_config.hdf5_with_latent_path).exists():
78
- logger.info(
79
- f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
- else:
81
- run_find_latent_representation(find_latent_config)
82
- end_time = time.time()
83
- logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
-
85
- # Step 2: Latent to gene
86
- start_time = time.time()
87
- logger.info("Step 2: Mapping latent representations to genes")
88
- if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
- logger.info(
90
- f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
91
- else:
92
- run_latent_to_gene(latent_to_gene_config)
93
- end_time = time.time()
94
- logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
95
-
96
- # Step 3: Generate LDScores
97
- start_time = time.time()
98
- logger.info("Step 3: Generating LDScores")
99
-
100
- # check if LDscore has been generated by the done file
101
- ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
102
- if ldsc_done_file.exists():
103
- logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
104
- else:
105
- run_generate_ldscore(ldscore_config)
106
- end_time = time.time()
107
- logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
108
- # create a done file
109
- ldsc_done_file.touch()
110
-
111
- # Step 4: Spatial LDSC
112
- start_time = time.time()
113
- logger.info("Step 4: Running spatial LDSC")
114
-
115
- sumstats_config = config.sumstats_config_dict
116
- for trait_name in sumstats_config:
117
- logger.info("Running spatial LDSC for trait: %s", trait_name)
118
- # detect if the spatial LDSC has been done:
119
- spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
120
-
121
- if spatial_ldsc_result_file.exists():
122
- logger.info(
123
- f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
124
- continue
125
-
126
- spatial_ldsc_config_trait = SpatialLDSCConfig(
127
- workdir=config.workdir,
128
- sumstats_file=sumstats_config[trait_name],
129
- trait_name=trait_name,
130
- w_file=config.w_file,
131
- sample_name=config.sample_name,
132
- # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
- # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
- num_processes=config.max_processes,
135
- ldscore_save_format='quick_mode',
136
- snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
- )
138
- run_spatial_ldsc(spatial_ldsc_config_trait)
139
- end_time = time.time()
140
- logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
141
-
142
- # Step 5: Cauchy combination test
143
- start_time = time.time()
144
- logger.info("Step 6: Running Cauchy combination test")
145
- '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
- for trait_name in sumstats_config:
147
- # check if the cauchy combination has been done
148
- cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
- if cauchy_result_file.exists():
150
- logger.info(
151
- f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
152
- continue
153
- cauchy_config = CauchyCombinationConfig(
154
- workdir=config.workdir,
155
- sample_name=config.sample_name,
156
- annotation=config.annotation,
157
- trait_name=trait_name,
158
- )
159
- run_Cauchy_combination(cauchy_config)
160
- end_time = time.time()
161
- logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
162
-
163
- # Step 6: Generate final report
164
- for trait_name in sumstats_config:
165
- logger.info("Running final report generation for trait: %s", trait_name)
166
- report_config = ReportConfig(
167
- workdir=config.workdir,
168
- sample_name=config.sample_name,
169
- annotation=config.annotation,
170
- trait_name=trait_name,
171
- plot_type='all',
172
- top_corr_genes=50,
173
- selected_genes=None,
174
- sumstats_file=sumstats_config[trait_name],
175
- )
176
- # Create the run parameters dictionary for each trait
177
- run_parameter_dict = {
178
- "Sample Name": config.sample_name,
179
- "Trait Name": trait_name,
180
- "Summary Statistics File": sumstats_config[trait_name],
181
- "HDF5 Path": config.hdf5_path,
182
- "Annotation": config.annotation,
183
- "Number of Processes": config.max_processes,
184
- "Spatial LDSC Save Directory": config.ldsc_save_dir,
185
- "Cauchy Directory": config.cauchy_save_dir,
186
- "Report Directory": config.get_report_dir(trait_name),
187
- "gsMap Report File": config.get_gsMap_report_file(trait_name),
188
- "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
189
- "Spending Time": format_duration(time.time() - pipeline_start_time),
190
- }
191
-
192
- # Pass the run parameter dictionary to the report generation function
193
- run_report(report_config, run_parameters=run_parameter_dict)
194
-
1
+ import logging
2
+ import time
3
+ from pathlib import Path
4
+
5
+ from gsMap.cauchy_combination_test import run_Cauchy_combination
6
+ from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
+ FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
8
+ from gsMap.find_latent_representation import run_find_latent_representation
9
+ from gsMap.generate_ldscore import run_generate_ldscore
10
+ from gsMap.latent_to_gene import run_latent_to_gene
11
+ from gsMap.report import run_report
12
+ from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
+
14
+
15
+
16
+ def format_duration(seconds):
17
+ hours = int(seconds // 3600)
18
+ minutes = int((seconds % 3600) // 60)
19
+ return f"{hours}h {minutes}m"
20
+
21
+
22
+ def run_pipeline(config: RunAllModeConfig):
23
+ # # Set up logging
24
+ log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
25
+ log_file.parent.mkdir(parents=True, exist_ok=True)
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='[{asctime}] {levelname:.5s} | {name} - {message}',
29
+ handlers=[
30
+ logging.FileHandler(log_file),
31
+ ],
32
+ style='{'
33
+ )
34
+
35
+ logger = logging.getLogger('gsMap.pipeline')
36
+ logger.info("Starting pipeline with configuration: %s", config)
37
+
38
+ find_latent_config = FindLatentRepresentationsConfig(
39
+ workdir=config.workdir,
40
+ input_hdf5_path=config.hdf5_path,
41
+ sample_name=config.sample_name,
42
+ annotation=config.annotation,
43
+ data_layer=config.data_layer
44
+ )
45
+
46
+ latent_to_gene_config = LatentToGeneConfig(
47
+ workdir=config.workdir,
48
+ sample_name=config.sample_name,
49
+ annotation=config.annotation,
50
+ latent_representation='latent_GVAE',
51
+ num_neighbour=51,
52
+ num_neighbour_spatial=201,
53
+ homolog_file=config.homolog_file
54
+ )
55
+
56
+ ldscore_config = GenerateLDScoreConfig(
57
+ workdir=config.workdir,
58
+ sample_name=config.sample_name,
59
+ chrom='all',
60
+ # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
+ # mkscore_feather_file=latent_to_gene_config.output_feather_path,
62
+ bfile_root=config.bfile_root,
63
+ keep_snp_root=config.keep_snp_root,
64
+ gtf_annotation_file=config.gtffile,
65
+ spots_per_chunk=5_000,
66
+ baseline_annotation_dir=config.baseline_annotation_dir,
67
+ SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
+ ldscore_save_format='quick_mode'
69
+
70
+ )
71
+
72
+ pipeline_start_time = time.time()
73
+
74
+ # Step 1: Find latent representations
75
+ start_time = time.time()
76
+ logger.info("Step 1: Finding latent representations")
77
+ if Path(find_latent_config.hdf5_with_latent_path).exists():
78
+ logger.info(
79
+ f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
+ else:
81
+ run_find_latent_representation(find_latent_config)
82
+ end_time = time.time()
83
+ logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
+
85
+ # Step 2: Latent to gene
86
+ start_time = time.time()
87
+ logger.info("Step 2: Mapping latent representations to genes")
88
+ if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
+ logger.info(
90
+ f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
91
+ else:
92
+ run_latent_to_gene(latent_to_gene_config)
93
+ end_time = time.time()
94
+ logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
95
+
96
+ # Step 3: Generate LDScores
97
+ start_time = time.time()
98
+ logger.info("Step 3: Generating LDScores")
99
+
100
+ # check if LDscore has been generated by the done file
101
+ ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
102
+ if ldsc_done_file.exists():
103
+ logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
104
+ else:
105
+ run_generate_ldscore(ldscore_config)
106
+ end_time = time.time()
107
+ logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
108
+ # create a done file
109
+ ldsc_done_file.touch()
110
+
111
+ # Step 4: Spatial LDSC
112
+ start_time = time.time()
113
+ logger.info("Step 4: Running spatial LDSC")
114
+
115
+ sumstats_config = config.sumstats_config_dict
116
+ for trait_name in sumstats_config:
117
+ logger.info("Running spatial LDSC for trait: %s", trait_name)
118
+ # detect if the spatial LDSC has been done:
119
+ spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
120
+
121
+ if spatial_ldsc_result_file.exists():
122
+ logger.info(
123
+ f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
124
+ continue
125
+
126
+ spatial_ldsc_config_trait = SpatialLDSCConfig(
127
+ workdir=config.workdir,
128
+ sumstats_file=sumstats_config[trait_name],
129
+ trait_name=trait_name,
130
+ w_file=config.w_file,
131
+ sample_name=config.sample_name,
132
+ # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
+ # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
+ num_processes=config.max_processes,
135
+ ldscore_save_format='quick_mode',
136
+ snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
+ )
138
+ run_spatial_ldsc(spatial_ldsc_config_trait)
139
+ end_time = time.time()
140
+ logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
141
+
142
+ # Step 5: Cauchy combination test
143
+ start_time = time.time()
144
+ logger.info("Step 6: Running Cauchy combination test")
145
+ '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
+ for trait_name in sumstats_config:
147
+ # check if the cauchy combination has been done
148
+ cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
+ if cauchy_result_file.exists():
150
+ logger.info(
151
+ f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
152
+ continue
153
+ cauchy_config = CauchyCombinationConfig(
154
+ workdir=config.workdir,
155
+ sample_name=config.sample_name,
156
+ annotation=config.annotation,
157
+ trait_name=trait_name,
158
+ )
159
+ run_Cauchy_combination(cauchy_config)
160
+ end_time = time.time()
161
+ logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
162
+
163
+ # Step 6: Generate final report
164
+ for trait_name in sumstats_config:
165
+ logger.info("Running final report generation for trait: %s", trait_name)
166
+ report_config = ReportConfig(
167
+ workdir=config.workdir,
168
+ sample_name=config.sample_name,
169
+ annotation=config.annotation,
170
+ trait_name=trait_name,
171
+ plot_type='all',
172
+ top_corr_genes=50,
173
+ selected_genes=None,
174
+ sumstats_file=sumstats_config[trait_name],
175
+ )
176
+ # Create the run parameters dictionary for each trait
177
+ run_parameter_dict = {
178
+ "Sample Name": config.sample_name,
179
+ "Trait Name": trait_name,
180
+ "Summary Statistics File": sumstats_config[trait_name],
181
+ "HDF5 Path": config.hdf5_path,
182
+ "Annotation": config.annotation,
183
+ "Number of Processes": config.max_processes,
184
+ "Spatial LDSC Save Directory": config.ldsc_save_dir,
185
+ "Cauchy Directory": config.cauchy_save_dir,
186
+ "Report Directory": config.get_report_dir(trait_name),
187
+ "gsMap Report File": config.get_gsMap_report_file(trait_name),
188
+ "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
189
+ "Spending Time": format_duration(time.time() - pipeline_start_time),
190
+ }
191
+
192
+ # Pass the run parameter dictionary to the report generation function
193
+ run_report(report_config, run_parameters=run_parameter_dict)
194
+
195
195
  logger.info("Pipeline completed successfully.")
gsMap/setup.py CHANGED
File without changes