gsMap 1.71.2__py3-none-any.whl → 1.73.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/run_all_mode.py CHANGED
@@ -3,8 +3,15 @@ import time
3
3
  from pathlib import Path
4
4
 
5
5
  from gsMap.cauchy_combination_test import run_Cauchy_combination
6
- from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
7
- FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
6
+ from gsMap.config import (
7
+ CauchyCombinationConfig,
8
+ FindLatentRepresentationsConfig,
9
+ GenerateLDScoreConfig,
10
+ LatentToGeneConfig,
11
+ ReportConfig,
12
+ RunAllModeConfig,
13
+ SpatialLDSCConfig,
14
+ )
8
15
  from gsMap.find_latent_representation import run_find_latent_representation
9
16
  from gsMap.generate_ldscore import run_generate_ldscore
10
17
  from gsMap.latent_to_gene import run_latent_to_gene
@@ -12,7 +19,6 @@ from gsMap.report import run_report
12
19
  from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
13
20
 
14
21
 
15
-
16
22
  def format_duration(seconds):
17
23
  hours = int(seconds // 3600)
18
24
  minutes = int((seconds % 3600) // 60)
@@ -21,73 +27,94 @@ def format_duration(seconds):
21
27
 
22
28
  def run_pipeline(config: RunAllModeConfig):
23
29
  # # Set up logging
24
- log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
30
+ _current_datatime = time.strftime("%Y%m%d_%H%M%S")
31
+ log_file = (
32
+ Path(config.workdir)
33
+ / config.sample_name
34
+ / f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
35
+ )
25
36
  log_file.parent.mkdir(parents=True, exist_ok=True)
26
37
  logging.basicConfig(
27
38
  level=logging.INFO,
28
- format='[{asctime}] {levelname:.5s} | {name} - {message}',
39
+ format="[{asctime}] {levelname:.5s} | {name} - {message}",
29
40
  handlers=[
30
41
  logging.FileHandler(log_file),
31
42
  ],
32
- style='{'
43
+ style="{",
33
44
  )
34
45
 
35
- logger = logging.getLogger('gsMap.pipeline')
46
+ logger = logging.getLogger("gsMap.pipeline")
36
47
  logger.info("Starting pipeline with configuration: %s", config)
48
+ pipeline_start_time = time.time()
37
49
 
38
- find_latent_config = FindLatentRepresentationsConfig(
39
- workdir=config.workdir,
40
- input_hdf5_path=config.hdf5_path,
41
- sample_name=config.sample_name,
42
- annotation=config.annotation,
43
- data_layer=config.data_layer
44
- )
50
+ # Step 1: Find latent representations
51
+ if config.latent_representation is not None:
52
+ logger.warning(
53
+ f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
54
+ )
55
+ logger.info(
56
+ "Skipping step 1: Find latent representations, as latent representation is provided."
57
+ )
58
+ latent_to_gene_input_hdf5_path = config.hdf5_path
59
+ else:
60
+ latent_to_gene_input_hdf5_path = None
61
+ logger.info(
62
+ "No latent representation provided. Will run the Find_latent_representations step."
63
+ )
64
+ find_latent_config = FindLatentRepresentationsConfig(
65
+ workdir=config.workdir,
66
+ input_hdf5_path=config.hdf5_path,
67
+ sample_name=config.sample_name,
68
+ annotation=config.annotation,
69
+ data_layer=config.data_layer,
70
+ n_comps=config.n_comps,
71
+ )
72
+
73
+ # Step 1: Find latent representations
74
+ start_time = time.time()
75
+
76
+ logger.info("Step 1: Finding latent representations")
77
+ if Path(find_latent_config.hdf5_with_latent_path).exists():
78
+ logger.info(
79
+ f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
80
+ )
81
+ else:
82
+ run_find_latent_representation(find_latent_config)
83
+ end_time = time.time()
84
+ logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
45
85
 
46
86
  latent_to_gene_config = LatentToGeneConfig(
87
+ input_hdf5_path=latent_to_gene_input_hdf5_path,
47
88
  workdir=config.workdir,
48
89
  sample_name=config.sample_name,
49
90
  annotation=config.annotation,
50
- latent_representation='latent_GVAE',
51
- num_neighbour=51,
52
- num_neighbour_spatial=201,
53
- homolog_file=config.homolog_file
91
+ latent_representation=config.latent_representation,
92
+ num_neighbour=config.num_neighbour,
93
+ num_neighbour_spatial=config.num_neighbour_spatial,
94
+ homolog_file=config.homolog_file,
95
+ gM_slices=config.gM_slices,
54
96
  )
55
97
 
56
98
  ldscore_config = GenerateLDScoreConfig(
57
99
  workdir=config.workdir,
58
100
  sample_name=config.sample_name,
59
- chrom='all',
60
- # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
61
- # mkscore_feather_file=latent_to_gene_config.output_feather_path,
101
+ chrom="all",
62
102
  bfile_root=config.bfile_root,
63
103
  keep_snp_root=config.keep_snp_root,
64
104
  gtf_annotation_file=config.gtffile,
65
105
  spots_per_chunk=5_000,
66
106
  baseline_annotation_dir=config.baseline_annotation_dir,
67
107
  SNP_gene_pair_dir=config.SNP_gene_pair_dir,
68
- ldscore_save_format='quick_mode'
69
-
108
+ ldscore_save_format="quick_mode",
70
109
  )
71
110
 
72
- pipeline_start_time = time.time()
73
-
74
- # Step 1: Find latent representations
75
- start_time = time.time()
76
- logger.info("Step 1: Finding latent representations")
77
- if Path(find_latent_config.hdf5_with_latent_path).exists():
78
- logger.info(
79
- f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
80
- else:
81
- run_find_latent_representation(find_latent_config)
82
- end_time = time.time()
83
- logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
84
-
85
111
  # Step 2: Latent to gene
86
112
  start_time = time.time()
87
113
  logger.info("Step 2: Mapping latent representations to genes")
88
114
  if Path(latent_to_gene_config.mkscore_feather_path).exists():
89
115
  logger.info(
90
- f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
116
+ f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
117
+ )
91
118
  else:
92
119
  run_latent_to_gene(latent_to_gene_config)
93
120
  end_time = time.time()
@@ -98,9 +125,13 @@ def run_pipeline(config: RunAllModeConfig):
98
125
  logger.info("Step 3: Generating LDScores")
99
126
 
100
127
  # check if LDscore has been generated by the done file
101
- ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
128
+ ldsc_done_file = (
129
+ Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
130
+ )
102
131
  if ldsc_done_file.exists():
103
- logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
132
+ logger.info(
133
+ f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
134
+ )
104
135
  else:
105
136
  run_generate_ldscore(ldscore_config)
106
137
  end_time = time.time()
@@ -116,11 +147,14 @@ def run_pipeline(config: RunAllModeConfig):
116
147
  for trait_name in sumstats_config:
117
148
  logger.info("Running spatial LDSC for trait: %s", trait_name)
118
149
  # detect if the spatial LDSC has been done:
119
- spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
150
+ spatial_ldsc_result_file = (
151
+ Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
152
+ )
120
153
 
121
154
  if spatial_ldsc_result_file.exists():
122
155
  logger.info(
123
- f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
156
+ f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
157
+ )
124
158
  continue
125
159
 
126
160
  spatial_ldsc_config_trait = SpatialLDSCConfig(
@@ -132,7 +166,7 @@ def run_pipeline(config: RunAllModeConfig):
132
166
  # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
133
167
  # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
134
168
  num_processes=config.max_processes,
135
- ldscore_save_format='quick_mode',
169
+ ldscore_save_format="quick_mode",
136
170
  snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
137
171
  )
138
172
  run_spatial_ldsc(spatial_ldsc_config_trait)
@@ -142,13 +176,13 @@ def run_pipeline(config: RunAllModeConfig):
142
176
  # Step 5: Cauchy combination test
143
177
  start_time = time.time()
144
178
  logger.info("Step 6: Running Cauchy combination test")
145
- '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
146
179
  for trait_name in sumstats_config:
147
180
  # check if the cauchy combination has been done
148
181
  cauchy_result_file = config.get_cauchy_result_file(trait_name)
149
182
  if cauchy_result_file.exists():
150
183
  logger.info(
151
- f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
184
+ f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
185
+ )
152
186
  continue
153
187
  cauchy_config = CauchyCombinationConfig(
154
188
  workdir=config.workdir,
@@ -168,11 +202,18 @@ def run_pipeline(config: RunAllModeConfig):
168
202
  sample_name=config.sample_name,
169
203
  annotation=config.annotation,
170
204
  trait_name=trait_name,
171
- plot_type='all',
205
+ plot_type="all",
172
206
  top_corr_genes=50,
173
207
  selected_genes=None,
174
208
  sumstats_file=sumstats_config[trait_name],
175
209
  )
210
+ gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
211
+ if Path(gsMap_report_file).exists():
212
+ logger.info(
213
+ f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
214
+ )
215
+ continue
216
+
176
217
  # Create the run parameters dictionary for each trait
177
218
  run_parameter_dict = {
178
219
  "Sample Name": config.sample_name,
@@ -192,4 +233,4 @@ def run_pipeline(config: RunAllModeConfig):
192
233
  # Pass the run parameter dictionary to the report generation function
193
234
  run_report(report_config, run_parameters=run_parameter_dict)
194
235
 
195
- logger.info("Pipeline completed successfully.")
236
+ logger.info("Pipeline completed successfully.")
gsMap/setup.py CHANGED
@@ -2,4 +2,4 @@
2
2
  import setuptools
3
3
 
4
4
  if __name__ == "__main__":
5
- setuptools.setup(name='gsMap')
5
+ setuptools.setup(name="gsMap")