gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/run_all_mode.py CHANGED
@@ -3,8 +3,15 @@ import time
3
3
  from pathlib import Path
4
4
 
5
5
  from gsMap.cauchy_combination_test import run_Cauchy_combination
6
- from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
- FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
6
+ from gsMap.config import (
7
+ CauchyCombinationConfig,
8
+ FindLatentRepresentationsConfig,
9
+ GenerateLDScoreConfig,
10
+ LatentToGeneConfig,
11
+ ReportConfig,
12
+ RunAllModeConfig,
13
+ SpatialLDSCConfig,
14
+ )
8
15
  from gsMap.find_latent_representation import run_find_latent_representation
9
16
  from gsMap.generate_ldscore import run_generate_ldscore
10
17
  from gsMap.latent_to_gene import run_latent_to_gene
@@ -12,7 +19,6 @@ from gsMap.report import run_report
12
19
  from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
20
 
14
21
 
15
-
16
22
  def format_duration(seconds):
17
23
  hours = int(seconds // 3600)
18
24
  minutes = int((seconds % 3600) // 60)
@@ -21,73 +27,93 @@ def format_duration(seconds):
21
27
 
22
28
  def run_pipeline(config: RunAllModeConfig):
23
29
  # # Set up logging
24
- log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
30
+ _current_datatime = time.strftime("%Y%m%d_%H%M%S")
31
+ log_file = (
32
+ Path(config.workdir)
33
+ / config.sample_name
34
+ / f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
35
+ )
25
36
  log_file.parent.mkdir(parents=True, exist_ok=True)
26
37
  logging.basicConfig(
27
38
  level=logging.INFO,
28
- format='[{asctime}] {levelname:.5s} | {name} - {message}',
39
+ format="[{asctime}] {levelname:.5s} | {name} - {message}",
29
40
  handlers=[
30
41
  logging.FileHandler(log_file),
31
42
  ],
32
- style='{'
43
+ style="{",
33
44
  )
34
45
 
35
- logger = logging.getLogger('gsMap.pipeline')
46
+ logger = logging.getLogger("gsMap.pipeline")
36
47
  logger.info("Starting pipeline with configuration: %s", config)
48
+ pipeline_start_time = time.time()
37
49
 
38
- find_latent_config = FindLatentRepresentationsConfig(
39
- workdir=config.workdir,
40
- input_hdf5_path=config.hdf5_path,
41
- sample_name=config.sample_name,
42
- annotation=config.annotation,
43
- data_layer=config.data_layer
44
- )
50
+ # Step 1: Find latent representations
51
+ if config.latent_representation is not None:
52
+ logger.warning(
53
+ f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
54
+ )
55
+ logger.info(
56
+ "Skipping step 1: Find latent representations, as latent representation is provided."
57
+ )
58
+ latent_to_gene_input_hdf5_path = config.hdf5_path
59
+ else:
60
+ latent_to_gene_input_hdf5_path = None
61
+ logger.info(
62
+ "No latent representation provided. Will run the Find_latent_representations step."
63
+ )
64
+ find_latent_config = FindLatentRepresentationsConfig(
65
+ workdir=config.workdir,
66
+ input_hdf5_path=config.hdf5_path,
67
+ sample_name=config.sample_name,
68
+ annotation=config.annotation,
69
+ data_layer=config.data_layer,
70
+ )
71
+
72
+ # Step 1: Find latent representations
73
+ start_time = time.time()
74
+
75
+ logger.info("Step 1: Finding latent representations")
76
+ if Path(find_latent_config.hdf5_with_latent_path).exists():
77
+ logger.info(
78
+ f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
79
+ )
80
+ else:
81
+ run_find_latent_representation(find_latent_config)
82
+ end_time = time.time()
83
+ logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
45
84
 
46
85
  latent_to_gene_config = LatentToGeneConfig(
86
+ input_hdf5_path=latent_to_gene_input_hdf5_path,
47
87
  workdir=config.workdir,
48
88
  sample_name=config.sample_name,
49
89
  annotation=config.annotation,
50
- latent_representation='latent_GVAE',
51
- num_neighbour=51,
52
- num_neighbour_spatial=201,
53
- homolog_file=config.homolog_file
90
+ latent_representation=config.latent_representation,
91
+ num_neighbour=config.num_neighbour,
92
+ num_neighbour_spatial=config.num_neighbour_spatial,
93
+ homolog_file=config.homolog_file,
94
+ gM_slices=config.gM_slices,
54
95
  )
55
96
 
56
97
  ldscore_config = GenerateLDScoreConfig(
57
98
  workdir=config.workdir,
58
99
  sample_name=config.sample_name,
59
- chrom='all',
60
- # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
- # mkscore_feather_file=latent_to_gene_config.output_feather_path,
100
+ chrom="all",
62
101
  bfile_root=config.bfile_root,
63
102
  keep_snp_root=config.keep_snp_root,
64
103
  gtf_annotation_file=config.gtffile,
65
104
  spots_per_chunk=5_000,
66
105
  baseline_annotation_dir=config.baseline_annotation_dir,
67
106
  SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
- ldscore_save_format='quick_mode'
69
-
107
+ ldscore_save_format="quick_mode",
70
108
  )
71
109
 
72
- pipeline_start_time = time.time()
73
-
74
- # Step 1: Find latent representations
75
- start_time = time.time()
76
- logger.info("Step 1: Finding latent representations")
77
- if Path(find_latent_config.hdf5_with_latent_path).exists():
78
- logger.info(
79
- f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
- else:
81
- run_find_latent_representation(find_latent_config)
82
- end_time = time.time()
83
- logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
-
85
110
  # Step 2: Latent to gene
86
111
  start_time = time.time()
87
112
  logger.info("Step 2: Mapping latent representations to genes")
88
113
  if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
114
  logger.info(
90
- f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
115
+ f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
116
+ )
91
117
  else:
92
118
  run_latent_to_gene(latent_to_gene_config)
93
119
  end_time = time.time()
@@ -98,9 +124,13 @@ def run_pipeline(config: RunAllModeConfig):
98
124
  logger.info("Step 3: Generating LDScores")
99
125
 
100
126
  # check if LDscore has been generated by the done file
101
- ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
127
+ ldsc_done_file = (
128
+ Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
129
+ )
102
130
  if ldsc_done_file.exists():
103
- logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
131
+ logger.info(
132
+ f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
133
+ )
104
134
  else:
105
135
  run_generate_ldscore(ldscore_config)
106
136
  end_time = time.time()
@@ -116,11 +146,14 @@ def run_pipeline(config: RunAllModeConfig):
116
146
  for trait_name in sumstats_config:
117
147
  logger.info("Running spatial LDSC for trait: %s", trait_name)
118
148
  # detect if the spatial LDSC has been done:
119
- spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
149
+ spatial_ldsc_result_file = (
150
+ Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
151
+ )
120
152
 
121
153
  if spatial_ldsc_result_file.exists():
122
154
  logger.info(
123
- f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
155
+ f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
156
+ )
124
157
  continue
125
158
 
126
159
  spatial_ldsc_config_trait = SpatialLDSCConfig(
@@ -132,7 +165,7 @@ def run_pipeline(config: RunAllModeConfig):
132
165
  # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
166
  # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
167
  num_processes=config.max_processes,
135
- ldscore_save_format='quick_mode',
168
+ ldscore_save_format="quick_mode",
136
169
  snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
170
  )
138
171
  run_spatial_ldsc(spatial_ldsc_config_trait)
@@ -142,13 +175,13 @@ def run_pipeline(config: RunAllModeConfig):
142
175
  # Step 5: Cauchy combination test
143
176
  start_time = time.time()
144
177
  logger.info("Step 6: Running Cauchy combination test")
145
- '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
178
  for trait_name in sumstats_config:
147
179
  # check if the cauchy combination has been done
148
180
  cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
181
  if cauchy_result_file.exists():
150
182
  logger.info(
151
- f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
183
+ f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
184
+ )
152
185
  continue
153
186
  cauchy_config = CauchyCombinationConfig(
154
187
  workdir=config.workdir,
@@ -168,11 +201,18 @@ def run_pipeline(config: RunAllModeConfig):
168
201
  sample_name=config.sample_name,
169
202
  annotation=config.annotation,
170
203
  trait_name=trait_name,
171
- plot_type='all',
204
+ plot_type="all",
172
205
  top_corr_genes=50,
173
206
  selected_genes=None,
174
207
  sumstats_file=sumstats_config[trait_name],
175
208
  )
209
+ gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
210
+ if Path(gsMap_report_file).exists():
211
+ logger.info(
212
+ f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
213
+ )
214
+ continue
215
+
176
216
  # Create the run parameters dictionary for each trait
177
217
  run_parameter_dict = {
178
218
  "Sample Name": config.sample_name,
@@ -192,4 +232,4 @@ def run_pipeline(config: RunAllModeConfig):
192
232
  # Pass the run parameter dictionary to the report generation function
193
233
  run_report(report_config, run_parameters=run_parameter_dict)
194
234
 
195
- logger.info("Pipeline completed successfully.")
235
+ logger.info("Pipeline completed successfully.")
gsMap/setup.py CHANGED
@@ -2,4 +2,4 @@
2
2
  import setuptools
3
3
 
4
4
  if __name__ == "__main__":
5
- setuptools.setup(name='gsMap')
5
+ setuptools.setup(name="gsMap")