gsMap 1.70__py3-none-any.whl → 1.71.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/__init__.py +0 -0
- gsMap/GNN/adjacency_matrix.py +75 -75
- gsMap/GNN/model.py +90 -89
- gsMap/GNN/train.py +0 -0
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +2 -2
- gsMap/cauchy_combination_test.py +141 -141
- gsMap/config.py +805 -805
- gsMap/diagnosis.py +273 -273
- gsMap/find_latent_representation.py +133 -133
- gsMap/format_sumstats.py +407 -407
- gsMap/generate_ldscore.py +618 -618
- gsMap/latent_to_gene.py +234 -234
- gsMap/main.py +31 -31
- gsMap/report.py +160 -160
- gsMap/run_all_mode.py +194 -194
- gsMap/setup.py +0 -0
- gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +735 -735
- gsMap/utils/jackknife.py +514 -514
- gsMap/utils/make_annotations.py +518 -518
- gsMap/utils/manhattan_plot.py +639 -639
- gsMap/utils/regression_read.py +294 -294
- gsMap/visualize.py +198 -198
- {gsmap-1.70.dist-info → gsmap-1.71.1.dist-info}/LICENSE +21 -21
- {gsmap-1.70.dist-info → gsmap-1.71.1.dist-info}/METADATA +2 -2
- gsmap-1.71.1.dist-info/RECORD +31 -0
- gsmap-1.70.dist-info/RECORD +0 -31
- {gsmap-1.70.dist-info → gsmap-1.71.1.dist-info}/WHEEL +0 -0
- {gsmap-1.70.dist-info → gsmap-1.71.1.dist-info}/entry_points.txt +0 -0
gsMap/run_all_mode.py
CHANGED
@@ -1,195 +1,195 @@
|
|
1
|
-
import logging
|
2
|
-
import time
|
3
|
-
from pathlib import Path
|
4
|
-
|
5
|
-
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
6
|
-
from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
|
7
|
-
FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
|
8
|
-
from gsMap.find_latent_representation import run_find_latent_representation
|
9
|
-
from gsMap.generate_ldscore import run_generate_ldscore
|
10
|
-
from gsMap.latent_to_gene import run_latent_to_gene
|
11
|
-
from gsMap.report import run_report
|
12
|
-
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def format_duration(seconds):
|
17
|
-
hours = int(seconds // 3600)
|
18
|
-
minutes = int((seconds % 3600) // 60)
|
19
|
-
return f"{hours}h {minutes}m"
|
20
|
-
|
21
|
-
|
22
|
-
def run_pipeline(config: RunAllModeConfig):
|
23
|
-
# # Set up logging
|
24
|
-
log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
|
25
|
-
log_file.parent.mkdir(parents=True, exist_ok=True)
|
26
|
-
logging.basicConfig(
|
27
|
-
level=logging.INFO,
|
28
|
-
format='[{asctime}] {levelname:.5s} | {name} - {message}',
|
29
|
-
handlers=[
|
30
|
-
logging.FileHandler(log_file),
|
31
|
-
],
|
32
|
-
style='{'
|
33
|
-
)
|
34
|
-
|
35
|
-
logger = logging.getLogger('gsMap.pipeline')
|
36
|
-
logger.info("Starting pipeline with configuration: %s", config)
|
37
|
-
|
38
|
-
find_latent_config = FindLatentRepresentationsConfig(
|
39
|
-
workdir=config.workdir,
|
40
|
-
input_hdf5_path=config.hdf5_path,
|
41
|
-
sample_name=config.sample_name,
|
42
|
-
annotation=config.annotation,
|
43
|
-
data_layer=config.data_layer
|
44
|
-
)
|
45
|
-
|
46
|
-
latent_to_gene_config = LatentToGeneConfig(
|
47
|
-
workdir=config.workdir,
|
48
|
-
sample_name=config.sample_name,
|
49
|
-
annotation=config.annotation,
|
50
|
-
latent_representation='latent_GVAE',
|
51
|
-
num_neighbour=51,
|
52
|
-
num_neighbour_spatial=201,
|
53
|
-
homolog_file=config.homolog_file
|
54
|
-
)
|
55
|
-
|
56
|
-
ldscore_config = GenerateLDScoreConfig(
|
57
|
-
workdir=config.workdir,
|
58
|
-
sample_name=config.sample_name,
|
59
|
-
chrom='all',
|
60
|
-
# ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
|
61
|
-
# mkscore_feather_file=latent_to_gene_config.output_feather_path,
|
62
|
-
bfile_root=config.bfile_root,
|
63
|
-
keep_snp_root=config.keep_snp_root,
|
64
|
-
gtf_annotation_file=config.gtffile,
|
65
|
-
spots_per_chunk=5_000,
|
66
|
-
baseline_annotation_dir=config.baseline_annotation_dir,
|
67
|
-
SNP_gene_pair_dir=config.SNP_gene_pair_dir,
|
68
|
-
ldscore_save_format='quick_mode'
|
69
|
-
|
70
|
-
)
|
71
|
-
|
72
|
-
pipeline_start_time = time.time()
|
73
|
-
|
74
|
-
# Step 1: Find latent representations
|
75
|
-
start_time = time.time()
|
76
|
-
logger.info("Step 1: Finding latent representations")
|
77
|
-
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
-
logger.info(
|
79
|
-
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
|
80
|
-
else:
|
81
|
-
run_find_latent_representation(find_latent_config)
|
82
|
-
end_time = time.time()
|
83
|
-
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
84
|
-
|
85
|
-
# Step 2: Latent to gene
|
86
|
-
start_time = time.time()
|
87
|
-
logger.info("Step 2: Mapping latent representations to genes")
|
88
|
-
if Path(latent_to_gene_config.mkscore_feather_path).exists():
|
89
|
-
logger.info(
|
90
|
-
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
|
91
|
-
else:
|
92
|
-
run_latent_to_gene(latent_to_gene_config)
|
93
|
-
end_time = time.time()
|
94
|
-
logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
|
95
|
-
|
96
|
-
# Step 3: Generate LDScores
|
97
|
-
start_time = time.time()
|
98
|
-
logger.info("Step 3: Generating LDScores")
|
99
|
-
|
100
|
-
# check if LDscore has been generated by the done file
|
101
|
-
ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
|
102
|
-
if ldsc_done_file.exists():
|
103
|
-
logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
|
104
|
-
else:
|
105
|
-
run_generate_ldscore(ldscore_config)
|
106
|
-
end_time = time.time()
|
107
|
-
logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
|
108
|
-
# create a done file
|
109
|
-
ldsc_done_file.touch()
|
110
|
-
|
111
|
-
# Step 4: Spatial LDSC
|
112
|
-
start_time = time.time()
|
113
|
-
logger.info("Step 4: Running spatial LDSC")
|
114
|
-
|
115
|
-
sumstats_config = config.sumstats_config_dict
|
116
|
-
for trait_name in sumstats_config:
|
117
|
-
logger.info("Running spatial LDSC for trait: %s", trait_name)
|
118
|
-
# detect if the spatial LDSC has been done:
|
119
|
-
spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
|
120
|
-
|
121
|
-
if spatial_ldsc_result_file.exists():
|
122
|
-
logger.info(
|
123
|
-
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
|
124
|
-
continue
|
125
|
-
|
126
|
-
spatial_ldsc_config_trait = SpatialLDSCConfig(
|
127
|
-
workdir=config.workdir,
|
128
|
-
sumstats_file=sumstats_config[trait_name],
|
129
|
-
trait_name=trait_name,
|
130
|
-
w_file=config.w_file,
|
131
|
-
sample_name=config.sample_name,
|
132
|
-
# ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
|
133
|
-
# ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
|
134
|
-
num_processes=config.max_processes,
|
135
|
-
ldscore_save_format='quick_mode',
|
136
|
-
snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
|
137
|
-
)
|
138
|
-
run_spatial_ldsc(spatial_ldsc_config_trait)
|
139
|
-
end_time = time.time()
|
140
|
-
logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
|
141
|
-
|
142
|
-
# Step 5: Cauchy combination test
|
143
|
-
start_time = time.time()
|
144
|
-
logger.info("Step 6: Running Cauchy combination test")
|
145
|
-
'/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
|
146
|
-
for trait_name in sumstats_config:
|
147
|
-
# check if the cauchy combination has been done
|
148
|
-
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
149
|
-
if cauchy_result_file.exists():
|
150
|
-
logger.info(
|
151
|
-
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
152
|
-
continue
|
153
|
-
cauchy_config = CauchyCombinationConfig(
|
154
|
-
workdir=config.workdir,
|
155
|
-
sample_name=config.sample_name,
|
156
|
-
annotation=config.annotation,
|
157
|
-
trait_name=trait_name,
|
158
|
-
)
|
159
|
-
run_Cauchy_combination(cauchy_config)
|
160
|
-
end_time = time.time()
|
161
|
-
logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
|
162
|
-
|
163
|
-
# Step 6: Generate final report
|
164
|
-
for trait_name in sumstats_config:
|
165
|
-
logger.info("Running final report generation for trait: %s", trait_name)
|
166
|
-
report_config = ReportConfig(
|
167
|
-
workdir=config.workdir,
|
168
|
-
sample_name=config.sample_name,
|
169
|
-
annotation=config.annotation,
|
170
|
-
trait_name=trait_name,
|
171
|
-
plot_type='all',
|
172
|
-
top_corr_genes=50,
|
173
|
-
selected_genes=None,
|
174
|
-
sumstats_file=sumstats_config[trait_name],
|
175
|
-
)
|
176
|
-
# Create the run parameters dictionary for each trait
|
177
|
-
run_parameter_dict = {
|
178
|
-
"Sample Name": config.sample_name,
|
179
|
-
"Trait Name": trait_name,
|
180
|
-
"Summary Statistics File": sumstats_config[trait_name],
|
181
|
-
"HDF5 Path": config.hdf5_path,
|
182
|
-
"Annotation": config.annotation,
|
183
|
-
"Number of Processes": config.max_processes,
|
184
|
-
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
185
|
-
"Cauchy Directory": config.cauchy_save_dir,
|
186
|
-
"Report Directory": config.get_report_dir(trait_name),
|
187
|
-
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
188
|
-
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
189
|
-
"Spending Time": format_duration(time.time() - pipeline_start_time),
|
190
|
-
}
|
191
|
-
|
192
|
-
# Pass the run parameter dictionary to the report generation function
|
193
|
-
run_report(report_config, run_parameters=run_parameter_dict)
|
194
|
-
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
6
|
+
from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
|
7
|
+
FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
|
8
|
+
from gsMap.find_latent_representation import run_find_latent_representation
|
9
|
+
from gsMap.generate_ldscore import run_generate_ldscore
|
10
|
+
from gsMap.latent_to_gene import run_latent_to_gene
|
11
|
+
from gsMap.report import run_report
|
12
|
+
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def format_duration(seconds):
|
17
|
+
hours = int(seconds // 3600)
|
18
|
+
minutes = int((seconds % 3600) // 60)
|
19
|
+
return f"{hours}h {minutes}m"
|
20
|
+
|
21
|
+
|
22
|
+
def run_pipeline(config: RunAllModeConfig):
|
23
|
+
# # Set up logging
|
24
|
+
log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
|
25
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
26
|
+
logging.basicConfig(
|
27
|
+
level=logging.INFO,
|
28
|
+
format='[{asctime}] {levelname:.5s} | {name} - {message}',
|
29
|
+
handlers=[
|
30
|
+
logging.FileHandler(log_file),
|
31
|
+
],
|
32
|
+
style='{'
|
33
|
+
)
|
34
|
+
|
35
|
+
logger = logging.getLogger('gsMap.pipeline')
|
36
|
+
logger.info("Starting pipeline with configuration: %s", config)
|
37
|
+
|
38
|
+
find_latent_config = FindLatentRepresentationsConfig(
|
39
|
+
workdir=config.workdir,
|
40
|
+
input_hdf5_path=config.hdf5_path,
|
41
|
+
sample_name=config.sample_name,
|
42
|
+
annotation=config.annotation,
|
43
|
+
data_layer=config.data_layer
|
44
|
+
)
|
45
|
+
|
46
|
+
latent_to_gene_config = LatentToGeneConfig(
|
47
|
+
workdir=config.workdir,
|
48
|
+
sample_name=config.sample_name,
|
49
|
+
annotation=config.annotation,
|
50
|
+
latent_representation='latent_GVAE',
|
51
|
+
num_neighbour=51,
|
52
|
+
num_neighbour_spatial=201,
|
53
|
+
homolog_file=config.homolog_file
|
54
|
+
)
|
55
|
+
|
56
|
+
ldscore_config = GenerateLDScoreConfig(
|
57
|
+
workdir=config.workdir,
|
58
|
+
sample_name=config.sample_name,
|
59
|
+
chrom='all',
|
60
|
+
# ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
|
61
|
+
# mkscore_feather_file=latent_to_gene_config.output_feather_path,
|
62
|
+
bfile_root=config.bfile_root,
|
63
|
+
keep_snp_root=config.keep_snp_root,
|
64
|
+
gtf_annotation_file=config.gtffile,
|
65
|
+
spots_per_chunk=5_000,
|
66
|
+
baseline_annotation_dir=config.baseline_annotation_dir,
|
67
|
+
SNP_gene_pair_dir=config.SNP_gene_pair_dir,
|
68
|
+
ldscore_save_format='quick_mode'
|
69
|
+
|
70
|
+
)
|
71
|
+
|
72
|
+
pipeline_start_time = time.time()
|
73
|
+
|
74
|
+
# Step 1: Find latent representations
|
75
|
+
start_time = time.time()
|
76
|
+
logger.info("Step 1: Finding latent representations")
|
77
|
+
if Path(find_latent_config.hdf5_with_latent_path).exists():
|
78
|
+
logger.info(
|
79
|
+
f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
|
80
|
+
else:
|
81
|
+
run_find_latent_representation(find_latent_config)
|
82
|
+
end_time = time.time()
|
83
|
+
logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
|
84
|
+
|
85
|
+
# Step 2: Latent to gene
|
86
|
+
start_time = time.time()
|
87
|
+
logger.info("Step 2: Mapping latent representations to genes")
|
88
|
+
if Path(latent_to_gene_config.mkscore_feather_path).exists():
|
89
|
+
logger.info(
|
90
|
+
f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
|
91
|
+
else:
|
92
|
+
run_latent_to_gene(latent_to_gene_config)
|
93
|
+
end_time = time.time()
|
94
|
+
logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
|
95
|
+
|
96
|
+
# Step 3: Generate LDScores
|
97
|
+
start_time = time.time()
|
98
|
+
logger.info("Step 3: Generating LDScores")
|
99
|
+
|
100
|
+
# check if LDscore has been generated by the done file
|
101
|
+
ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
|
102
|
+
if ldsc_done_file.exists():
|
103
|
+
logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
|
104
|
+
else:
|
105
|
+
run_generate_ldscore(ldscore_config)
|
106
|
+
end_time = time.time()
|
107
|
+
logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
|
108
|
+
# create a done file
|
109
|
+
ldsc_done_file.touch()
|
110
|
+
|
111
|
+
# Step 4: Spatial LDSC
|
112
|
+
start_time = time.time()
|
113
|
+
logger.info("Step 4: Running spatial LDSC")
|
114
|
+
|
115
|
+
sumstats_config = config.sumstats_config_dict
|
116
|
+
for trait_name in sumstats_config:
|
117
|
+
logger.info("Running spatial LDSC for trait: %s", trait_name)
|
118
|
+
# detect if the spatial LDSC has been done:
|
119
|
+
spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
|
120
|
+
|
121
|
+
if spatial_ldsc_result_file.exists():
|
122
|
+
logger.info(
|
123
|
+
f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
|
124
|
+
continue
|
125
|
+
|
126
|
+
spatial_ldsc_config_trait = SpatialLDSCConfig(
|
127
|
+
workdir=config.workdir,
|
128
|
+
sumstats_file=sumstats_config[trait_name],
|
129
|
+
trait_name=trait_name,
|
130
|
+
w_file=config.w_file,
|
131
|
+
sample_name=config.sample_name,
|
132
|
+
# ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
|
133
|
+
# ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
|
134
|
+
num_processes=config.max_processes,
|
135
|
+
ldscore_save_format='quick_mode',
|
136
|
+
snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
|
137
|
+
)
|
138
|
+
run_spatial_ldsc(spatial_ldsc_config_trait)
|
139
|
+
end_time = time.time()
|
140
|
+
logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
|
141
|
+
|
142
|
+
# Step 5: Cauchy combination test
|
143
|
+
start_time = time.time()
|
144
|
+
logger.info("Step 6: Running Cauchy combination test")
|
145
|
+
'/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
|
146
|
+
for trait_name in sumstats_config:
|
147
|
+
# check if the cauchy combination has been done
|
148
|
+
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
149
|
+
if cauchy_result_file.exists():
|
150
|
+
logger.info(
|
151
|
+
f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
|
152
|
+
continue
|
153
|
+
cauchy_config = CauchyCombinationConfig(
|
154
|
+
workdir=config.workdir,
|
155
|
+
sample_name=config.sample_name,
|
156
|
+
annotation=config.annotation,
|
157
|
+
trait_name=trait_name,
|
158
|
+
)
|
159
|
+
run_Cauchy_combination(cauchy_config)
|
160
|
+
end_time = time.time()
|
161
|
+
logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
|
162
|
+
|
163
|
+
# Step 6: Generate final report
|
164
|
+
for trait_name in sumstats_config:
|
165
|
+
logger.info("Running final report generation for trait: %s", trait_name)
|
166
|
+
report_config = ReportConfig(
|
167
|
+
workdir=config.workdir,
|
168
|
+
sample_name=config.sample_name,
|
169
|
+
annotation=config.annotation,
|
170
|
+
trait_name=trait_name,
|
171
|
+
plot_type='all',
|
172
|
+
top_corr_genes=50,
|
173
|
+
selected_genes=None,
|
174
|
+
sumstats_file=sumstats_config[trait_name],
|
175
|
+
)
|
176
|
+
# Create the run parameters dictionary for each trait
|
177
|
+
run_parameter_dict = {
|
178
|
+
"Sample Name": config.sample_name,
|
179
|
+
"Trait Name": trait_name,
|
180
|
+
"Summary Statistics File": sumstats_config[trait_name],
|
181
|
+
"HDF5 Path": config.hdf5_path,
|
182
|
+
"Annotation": config.annotation,
|
183
|
+
"Number of Processes": config.max_processes,
|
184
|
+
"Spatial LDSC Save Directory": config.ldsc_save_dir,
|
185
|
+
"Cauchy Directory": config.cauchy_save_dir,
|
186
|
+
"Report Directory": config.get_report_dir(trait_name),
|
187
|
+
"gsMap Report File": config.get_gsMap_report_file(trait_name),
|
188
|
+
"Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
|
189
|
+
"Spending Time": format_duration(time.time() - pipeline_start_time),
|
190
|
+
}
|
191
|
+
|
192
|
+
# Pass the run parameter dictionary to the report generation function
|
193
|
+
run_report(report_config, run_parameters=run_parameter_dict)
|
194
|
+
|
195
195
|
logger.info("Pipeline completed successfully.")
|
gsMap/setup.py
CHANGED
File without changes
|