gsMap 1.71.2__py3-none-any.whl → 1.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +78 -75
- gsMap/config.py +948 -322
- gsMap/create_slice_mean.py +168 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +29 -27
- gsMap/format_sumstats.py +239 -201
- gsMap/generate_ldscore.py +334 -222
- gsMap/latent_to_gene.py +128 -68
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +87 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +100 -346
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +83 -176
- gsMap/visualize.py +82 -64
- gsmap-1.73.0.dist-info/METADATA +169 -0
- gsmap-1.73.0.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info}/WHEEL +1 -1
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info/licenses}/LICENSE +6 -6
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/METADATA +0 -105
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.73.0.dist-info}/entry_points.txt +0 -0
gsMap/run_all_mode.py
CHANGED
@@ -3,8 +3,15 @@ import time
|
|
3
3
|
from pathlib import Path
|
4
4
|
|
5
5
|
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
6
|
-
from gsMap.config import
|
7
|
-
|
6
|
+
from gsMap.config import (
|
7
|
+
CauchyCombinationConfig,
|
8
|
+
FindLatentRepresentationsConfig,
|
9
|
+
GenerateLDScoreConfig,
|
10
|
+
LatentToGeneConfig,
|
11
|
+
ReportConfig,
|
12
|
+
RunAllModeConfig,
|
13
|
+
SpatialLDSCConfig,
|
14
|
+
)
|
8
15
|
from gsMap.find_latent_representation import run_find_latent_representation
|
9
16
|
from gsMap.generate_ldscore import run_generate_ldscore
|
10
17
|
from gsMap.latent_to_gene import run_latent_to_gene
|
@@ -12,7 +19,6 @@ from gsMap.report import run_report
|
|
12
19
|
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
13
20
|
|
14
21
|
|
15
|
-
|
16
22
|
def format_duration(seconds):
|
17
23
|
hours = int(seconds // 3600)
|
18
24
|
minutes = int((seconds % 3600) // 60)
|
@@ -21,73 +27,94 @@ def format_duration(seconds):
|
|
21
27
|
|
22
28
|
def run_pipeline(config: RunAllModeConfig):
|
23
29
|
# # Set up logging
|
24
|
-
|
30
|
+
_current_datatime = time.strftime("%Y%m%d_%H%M%S")
|
31
|
+
log_file = (
|
32
|
+
Path(config.workdir)
|
33
|
+
/ config.sample_name
|
34
|
+
/ f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
|
35
|
+
)
|
25
36
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
26
37
|
logging.basicConfig(
|
27
38
|
level=logging.INFO,
|
28
|
-
format=
|
39
|
+
format="[{asctime}] {levelname:.5s} | {name} - {message}",
|
29
40
|
handlers=[
|
30
41
|
logging.FileHandler(log_file),
|
31
42
|
],
|
32
|
-
style=
|
43
|
+
style="{",
|
33
44
|
)
|
34
45
|
|
35
|
-
logger = logging.getLogger(
|
46
|
+
logger = logging.getLogger("gsMap.pipeline")
|
36
47
|
logger.info("Starting pipeline with configuration: %s", config)
|
48
|
+
pipeline_start_time = time.time()
|
37
49
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
50
|
+
# Step 1: Find latent representations
|
51
|
+
if config.latent_representation is not None:
|
52
|
+
logger.warning(
|
53
|
+
f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
|
54
|
+
)
|
55
|
+
logger.info(
|
56
|
+
"Skipping step 1: Find latent representations, as latent representation is provided."
|
57
|
+
)
|
58
|
+
latent_to_gene_input_hdf5_path = config.hdf5_path
|
59
|
+
else:
|
60
|
+
latent_to_gene_input_hdf5_path = None
|
61
|
+
logger.info(
|
62
|
+
"No latent representation provided. Will run the Find_latent_representations step."
|
63
|
+
)
|
64
|
+
find_latent_config = FindLatentRepresentationsConfig(
|
65
|
+
workdir=config.workdir,
|
66
|
+
input_hdf5_path=config.hdf5_path,
|
67
|
+
sample_name=config.sample_name,
|
68
|
+
annotation=config.annotation,
|
69
|
+
data_layer=config.data_layer,
|
70
|
+
n_comps=config.n_comps,
|
71
|
+
)
|
72
|
+
|
73
|
+
# Step 1: Find latent representations
|
74
|
+
start_time = time.time()
|
75
|
+
|
76
|
+
logger.info("Step 1: Finding latent representations")
|
77
|
+
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
+
logger.info(
|
79
|
+
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
|
80
|
+
)
|
81
|
+
else:
|
82
|
+
run_find_latent_representation(find_latent_config)
|
83
|
+
end_time = time.time()
|
84
|
+
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
45
85
|
|
46
86
|
latent_to_gene_config = LatentToGeneConfig(
|
87
|
+
input_hdf5_path=latent_to_gene_input_hdf5_path,
|
47
88
|
workdir=config.workdir,
|
48
89
|
sample_name=config.sample_name,
|
49
90
|
annotation=config.annotation,
|
50
|
-
latent_representation=
|
51
|
-
num_neighbour=
|
52
|
-
num_neighbour_spatial=
|
53
|
-
homolog_file=config.homolog_file
|
91
|
+
latent_representation=config.latent_representation,
|
92
|
+
num_neighbour=config.num_neighbour,
|
93
|
+
num_neighbour_spatial=config.num_neighbour_spatial,
|
94
|
+
homolog_file=config.homolog_file,
|
95
|
+
gM_slices=config.gM_slices,
|
54
96
|
)
|
55
97
|
|
56
98
|
ldscore_config = GenerateLDScoreConfig(
|
57
99
|
workdir=config.workdir,
|
58
100
|
sample_name=config.sample_name,
|
59
|
-
chrom=
|
60
|
-
# ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
|
61
|
-
# mkscore_feather_file=latent_to_gene_config.output_feather_path,
|
101
|
+
chrom="all",
|
62
102
|
bfile_root=config.bfile_root,
|
63
103
|
keep_snp_root=config.keep_snp_root,
|
64
104
|
gtf_annotation_file=config.gtffile,
|
65
105
|
spots_per_chunk=5_000,
|
66
106
|
baseline_annotation_dir=config.baseline_annotation_dir,
|
67
107
|
SNP_gene_pair_dir=config.SNP_gene_pair_dir,
|
68
|
-
ldscore_save_format=
|
69
|
-
|
108
|
+
ldscore_save_format="quick_mode",
|
70
109
|
)
|
71
110
|
|
72
|
-
pipeline_start_time = time.time()
|
73
|
-
|
74
|
-
# Step 1: Find latent representations
|
75
|
-
start_time = time.time()
|
76
|
-
logger.info("Step 1: Finding latent representations")
|
77
|
-
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
-
logger.info(
|
79
|
-
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
|
80
|
-
else:
|
81
|
-
run_find_latent_representation(find_latent_config)
|
82
|
-
end_time = time.time()
|
83
|
-
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
84
|
-
|
85
111
|
# Step 2: Latent to gene
|
86
112
|
start_time = time.time()
|
87
113
|
logger.info("Step 2: Mapping latent representations to genes")
|
88
114
|
if Path(latent_to_gene_config.mkscore_feather_path).exists():
|
89
115
|
logger.info(
|
90
|
-
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
|
116
|
+
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
|
117
|
+
)
|
91
118
|
else:
|
92
119
|
run_latent_to_gene(latent_to_gene_config)
|
93
120
|
end_time = time.time()
|
@@ -98,9 +125,13 @@ def run_pipeline(config: RunAllModeConfig):
|
|
98
125
|
logger.info("Step 3: Generating LDScores")
|
99
126
|
|
100
127
|
# check if LDscore has been generated by the done file
|
101
|
-
ldsc_done_file =
|
128
|
+
ldsc_done_file = (
|
129
|
+
Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
|
130
|
+
)
|
102
131
|
if ldsc_done_file.exists():
|
103
|
-
logger.info(
|
132
|
+
logger.info(
|
133
|
+
f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
|
134
|
+
)
|
104
135
|
else:
|
105
136
|
run_generate_ldscore(ldscore_config)
|
106
137
|
end_time = time.time()
|
@@ -116,11 +147,14 @@ def run_pipeline(config: RunAllModeConfig):
|
|
116
147
|
for trait_name in sumstats_config:
|
117
148
|
logger.info("Running spatial LDSC for trait: %s", trait_name)
|
118
149
|
# detect if the spatial LDSC has been done:
|
119
|
-
spatial_ldsc_result_file =
|
150
|
+
spatial_ldsc_result_file = (
|
151
|
+
Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
|
152
|
+
)
|
120
153
|
|
121
154
|
if spatial_ldsc_result_file.exists():
|
122
155
|
logger.info(
|
123
|
-
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
|
156
|
+
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
|
157
|
+
)
|
124
158
|
continue
|
125
159
|
|
126
160
|
spatial_ldsc_config_trait = SpatialLDSCConfig(
|
@@ -132,7 +166,7 @@ def run_pipeline(config: RunAllModeConfig):
|
|
132
166
|
# ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
|
133
167
|
# ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
|
134
168
|
num_processes=config.max_processes,
|
135
|
-
ldscore_save_format=
|
169
|
+
ldscore_save_format="quick_mode",
|
136
170
|
snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
|
137
171
|
)
|
138
172
|
run_spatial_ldsc(spatial_ldsc_config_trait)
|
@@ -142,13 +176,13 @@ def run_pipeline(config: RunAllModeConfig):
|
|
142
176
|
# Step 5: Cauchy combination test
|
143
177
|
start_time = time.time()
|
144
178
|
logger.info("Step 6: Running Cauchy combination test")
|
145
|
-
'/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
|
146
179
|
for trait_name in sumstats_config:
|
147
180
|
# check if the cauchy combination has been done
|
148
181
|
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
149
182
|
if cauchy_result_file.exists():
|
150
183
|
logger.info(
|
151
|
-
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
184
|
+
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
|
185
|
+
)
|
152
186
|
continue
|
153
187
|
cauchy_config = CauchyCombinationConfig(
|
154
188
|
workdir=config.workdir,
|
@@ -168,11 +202,18 @@ def run_pipeline(config: RunAllModeConfig):
|
|
168
202
|
sample_name=config.sample_name,
|
169
203
|
annotation=config.annotation,
|
170
204
|
trait_name=trait_name,
|
171
|
-
plot_type=
|
205
|
+
plot_type="all",
|
172
206
|
top_corr_genes=50,
|
173
207
|
selected_genes=None,
|
174
208
|
sumstats_file=sumstats_config[trait_name],
|
175
209
|
)
|
210
|
+
gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
|
211
|
+
if Path(gsMap_report_file).exists():
|
212
|
+
logger.info(
|
213
|
+
f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
|
214
|
+
)
|
215
|
+
continue
|
216
|
+
|
176
217
|
# Create the run parameters dictionary for each trait
|
177
218
|
run_parameter_dict = {
|
178
219
|
"Sample Name": config.sample_name,
|
@@ -192,4 +233,4 @@ def run_pipeline(config: RunAllModeConfig):
|
|
192
233
|
# Pass the run parameter dictionary to the report generation function
|
193
234
|
run_report(report_config, run_parameters=run_parameter_dict)
|
194
235
|
|
195
|
-
logger.info("Pipeline completed successfully.")
|
236
|
+
logger.info("Pipeline completed successfully.")
|
gsMap/setup.py
CHANGED