gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +75 -72
- gsMap/config.py +822 -316
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +28 -26
- gsMap/format_sumstats.py +233 -201
- gsMap/generate_ldscore.py +353 -209
- gsMap/latent_to_gene.py +92 -60
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +86 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +173 -140
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +105 -122
- gsMap/visualize.py +82 -64
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/run_all_mode.py
CHANGED
@@ -3,8 +3,15 @@ import time
|
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
5
|
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
6
|
-
from gsMap.config import
|
7
|
-
|
6
|
+
from gsMap.config import (
|
7
|
+
CauchyCombinationConfig,
|
8
|
+
FindLatentRepresentationsConfig,
|
9
|
+
GenerateLDScoreConfig,
|
10
|
+
LatentToGeneConfig,
|
11
|
+
ReportConfig,
|
12
|
+
RunAllModeConfig,
|
13
|
+
SpatialLDSCConfig,
|
14
|
+
)
|
8
15
|
from gsMap.find_latent_representation import run_find_latent_representation
|
9
16
|
from gsMap.generate_ldscore import run_generate_ldscore
|
10
17
|
from gsMap.latent_to_gene import run_latent_to_gene
|
@@ -12,7 +19,6 @@ from gsMap.report import run_report
|
|
12
19
|
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
13
20
|
|
14
21
|
|
15
|
-
|
16
22
|
def format_duration(seconds):
|
17
23
|
hours = int(seconds // 3600)
|
18
24
|
minutes = int((seconds % 3600) // 60)
|
@@ -21,73 +27,93 @@ def format_duration(seconds):
|
|
21
27
|
|
22
28
|
def run_pipeline(config: RunAllModeConfig):
|
23
29
|
# # Set up logging
|
24
|
-
|
30
|
+
_current_datatime = time.strftime("%Y%m%d_%H%M%S")
|
31
|
+
log_file = (
|
32
|
+
Path(config.workdir)
|
33
|
+
/ config.sample_name
|
34
|
+
/ f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
|
35
|
+
)
|
25
36
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
26
37
|
logging.basicConfig(
|
27
38
|
level=logging.INFO,
|
28
|
-
format=
|
39
|
+
format="[{asctime}] {levelname:.5s} | {name} - {message}",
|
29
40
|
handlers=[
|
30
41
|
logging.FileHandler(log_file),
|
31
42
|
],
|
32
|
-
style=
|
43
|
+
style="{",
|
33
44
|
)
|
34
45
|
|
35
|
-
logger = logging.getLogger(
|
46
|
+
logger = logging.getLogger("gsMap.pipeline")
|
36
47
|
logger.info("Starting pipeline with configuration: %s", config)
|
48
|
+
pipeline_start_time = time.time()
|
37
49
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
50
|
+
# Step 1: Find latent representations
|
51
|
+
if config.latent_representation is not None:
|
52
|
+
logger.warning(
|
53
|
+
f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
|
54
|
+
)
|
55
|
+
logger.info(
|
56
|
+
"Skipping step 1: Find latent representations, as latent representation is provided."
|
57
|
+
)
|
58
|
+
latent_to_gene_input_hdf5_path = config.hdf5_path
|
59
|
+
else:
|
60
|
+
latent_to_gene_input_hdf5_path = None
|
61
|
+
logger.info(
|
62
|
+
"No latent representation provided. Will run the Find_latent_representations step."
|
63
|
+
)
|
64
|
+
find_latent_config = FindLatentRepresentationsConfig(
|
65
|
+
workdir=config.workdir,
|
66
|
+
input_hdf5_path=config.hdf5_path,
|
67
|
+
sample_name=config.sample_name,
|
68
|
+
annotation=config.annotation,
|
69
|
+
data_layer=config.data_layer,
|
70
|
+
)
|
71
|
+
|
72
|
+
# Step 1: Find latent representations
|
73
|
+
start_time = time.time()
|
74
|
+
|
75
|
+
logger.info("Step 1: Finding latent representations")
|
76
|
+
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
77
|
+
logger.info(
|
78
|
+
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
|
79
|
+
)
|
80
|
+
else:
|
81
|
+
run_find_latent_representation(find_latent_config)
|
82
|
+
end_time = time.time()
|
83
|
+
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
45
84
|
|
46
85
|
latent_to_gene_config = LatentToGeneConfig(
|
86
|
+
input_hdf5_path=latent_to_gene_input_hdf5_path,
|
47
87
|
workdir=config.workdir,
|
48
88
|
sample_name=config.sample_name,
|
49
89
|
annotation=config.annotation,
|
50
|
-
latent_representation=
|
51
|
-
num_neighbour=
|
52
|
-
num_neighbour_spatial=
|
53
|
-
homolog_file=config.homolog_file
|
90
|
+
latent_representation=config.latent_representation,
|
91
|
+
num_neighbour=config.num_neighbour,
|
92
|
+
num_neighbour_spatial=config.num_neighbour_spatial,
|
93
|
+
homolog_file=config.homolog_file,
|
94
|
+
gM_slices=config.gM_slices,
|
54
95
|
)
|
55
96
|
|
56
97
|
ldscore_config = GenerateLDScoreConfig(
|
57
98
|
workdir=config.workdir,
|
58
99
|
sample_name=config.sample_name,
|
59
|
-
chrom=
|
60
|
-
# ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
|
61
|
-
# mkscore_feather_file=latent_to_gene_config.output_feather_path,
|
100
|
+
chrom="all",
|
62
101
|
bfile_root=config.bfile_root,
|
63
102
|
keep_snp_root=config.keep_snp_root,
|
64
103
|
gtf_annotation_file=config.gtffile,
|
65
104
|
spots_per_chunk=5_000,
|
66
105
|
baseline_annotation_dir=config.baseline_annotation_dir,
|
67
106
|
SNP_gene_pair_dir=config.SNP_gene_pair_dir,
|
68
|
-
ldscore_save_format=
|
69
|
-
|
107
|
+
ldscore_save_format="quick_mode",
|
70
108
|
)
|
71
109
|
|
72
|
-
pipeline_start_time = time.time()
|
73
|
-
|
74
|
-
# Step 1: Find latent representations
|
75
|
-
start_time = time.time()
|
76
|
-
logger.info("Step 1: Finding latent representations")
|
77
|
-
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
-
logger.info(
|
79
|
-
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
|
80
|
-
else:
|
81
|
-
run_find_latent_representation(find_latent_config)
|
82
|
-
end_time = time.time()
|
83
|
-
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
84
|
-
|
85
110
|
# Step 2: Latent to gene
|
86
111
|
start_time = time.time()
|
87
112
|
logger.info("Step 2: Mapping latent representations to genes")
|
88
113
|
if Path(latent_to_gene_config.mkscore_feather_path).exists():
|
89
114
|
logger.info(
|
90
|
-
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
|
115
|
+
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
|
116
|
+
)
|
91
117
|
else:
|
92
118
|
run_latent_to_gene(latent_to_gene_config)
|
93
119
|
end_time = time.time()
|
@@ -98,9 +124,13 @@ def run_pipeline(config: RunAllModeConfig):
|
|
98
124
|
logger.info("Step 3: Generating LDScores")
|
99
125
|
|
100
126
|
# check if LDscore has been generated by the done file
|
101
|
-
ldsc_done_file =
|
127
|
+
ldsc_done_file = (
|
128
|
+
Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
|
129
|
+
)
|
102
130
|
if ldsc_done_file.exists():
|
103
|
-
logger.info(
|
131
|
+
logger.info(
|
132
|
+
f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
|
133
|
+
)
|
104
134
|
else:
|
105
135
|
run_generate_ldscore(ldscore_config)
|
106
136
|
end_time = time.time()
|
@@ -116,11 +146,14 @@ def run_pipeline(config: RunAllModeConfig):
|
|
116
146
|
for trait_name in sumstats_config:
|
117
147
|
logger.info("Running spatial LDSC for trait: %s", trait_name)
|
118
148
|
# detect if the spatial LDSC has been done:
|
119
|
-
spatial_ldsc_result_file =
|
149
|
+
spatial_ldsc_result_file = (
|
150
|
+
Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
|
151
|
+
)
|
120
152
|
|
121
153
|
if spatial_ldsc_result_file.exists():
|
122
154
|
logger.info(
|
123
|
-
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
|
155
|
+
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
|
156
|
+
)
|
124
157
|
continue
|
125
158
|
|
126
159
|
spatial_ldsc_config_trait = SpatialLDSCConfig(
|
@@ -132,7 +165,7 @@ def run_pipeline(config: RunAllModeConfig):
|
|
132
165
|
# ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
|
133
166
|
# ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
|
134
167
|
num_processes=config.max_processes,
|
135
|
-
ldscore_save_format=
|
168
|
+
ldscore_save_format="quick_mode",
|
136
169
|
snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
|
137
170
|
)
|
138
171
|
run_spatial_ldsc(spatial_ldsc_config_trait)
|
@@ -142,13 +175,13 @@ def run_pipeline(config: RunAllModeConfig):
|
|
142
175
|
# Step 5: Cauchy combination test
|
143
176
|
start_time = time.time()
|
144
177
|
logger.info("Step 6: Running Cauchy combination test")
|
145
|
-
'/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
|
146
178
|
for trait_name in sumstats_config:
|
147
179
|
# check if the cauchy combination has been done
|
148
180
|
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
149
181
|
if cauchy_result_file.exists():
|
150
182
|
logger.info(
|
151
|
-
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
183
|
+
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
184
|
+
)
|
152
185
|
continue
|
153
186
|
cauchy_config = CauchyCombinationConfig(
|
154
187
|
workdir=config.workdir,
|
@@ -168,11 +201,18 @@ def run_pipeline(config: RunAllModeConfig):
|
|
168
201
|
sample_name=config.sample_name,
|
169
202
|
annotation=config.annotation,
|
170
203
|
trait_name=trait_name,
|
171
|
-
plot_type=
|
204
|
+
plot_type="all",
|
172
205
|
top_corr_genes=50,
|
173
206
|
selected_genes=None,
|
174
207
|
sumstats_file=sumstats_config[trait_name],
|
175
208
|
)
|
209
|
+
gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
|
210
|
+
if Path(gsMap_report_file).exists():
|
211
|
+
logger.info(
|
212
|
+
f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
|
213
|
+
)
|
214
|
+
continue
|
215
|
+
|
176
216
|
# Create the run parameters dictionary for each trait
|
177
217
|
run_parameter_dict = {
|
178
218
|
"Sample Name": config.sample_name,
|
@@ -192,4 +232,4 @@ def run_pipeline(config: RunAllModeConfig):
|
|
192
232
|
# Pass the run parameter dictionary to the report generation function
|
193
233
|
run_report(report_config, run_parameters=run_parameter_dict)
|
194
234
|
|
195
|
-
logger.info("Pipeline completed successfully.")
|
235
|
+
logger.info("Pipeline completed successfully.")
|
gsMap/setup.py
CHANGED