gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/adjacency_matrix.py +25 -27
- gsMap/GNN/model.py +9 -7
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +3 -3
- gsMap/__main__.py +3 -2
- gsMap/cauchy_combination_test.py +75 -72
- gsMap/config.py +822 -316
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +179 -101
- gsMap/find_latent_representation.py +28 -26
- gsMap/format_sumstats.py +233 -201
- gsMap/generate_ldscore.py +353 -209
- gsMap/latent_to_gene.py +92 -60
- gsMap/main.py +23 -14
- gsMap/report.py +39 -25
- gsMap/run_all_mode.py +86 -46
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
- gsMap/utils/generate_r2_matrix.py +173 -140
- gsMap/utils/jackknife.py +84 -80
- gsMap/utils/manhattan_plot.py +180 -207
- gsMap/utils/regression_read.py +105 -122
- gsMap/visualize.py +82 -64
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.2.dist-info/RECORD +0 -31
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
- {gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/config.py
CHANGED
@@ -1,75 +1,118 @@
|
|
1
|
-
import sys
|
2
1
|
import argparse
|
2
|
+
import dataclasses
|
3
3
|
import logging
|
4
|
+
import sys
|
4
5
|
from collections import OrderedDict, namedtuple
|
6
|
+
from collections.abc import Callable
|
5
7
|
from dataclasses import dataclass
|
8
|
+
from functools import wraps
|
6
9
|
from pathlib import Path
|
7
10
|
from pprint import pprint
|
8
|
-
from typing import
|
9
|
-
|
10
|
-
from functools import wraps
|
11
|
+
from typing import Literal
|
12
|
+
|
11
13
|
import pyfiglet
|
14
|
+
import yaml
|
12
15
|
|
13
16
|
from gsMap.__init__ import __version__
|
14
17
|
|
15
18
|
# Global registry to hold functions
|
16
19
|
cli_function_registry = OrderedDict()
|
17
|
-
subcommand = namedtuple(
|
20
|
+
subcommand = namedtuple("subcommand", ["name", "func", "add_args_function", "description"])
|
18
21
|
|
19
22
|
|
20
23
|
def get_gsMap_logger(logger_name):
|
21
24
|
logger = logging.getLogger(logger_name)
|
22
25
|
logger.setLevel(logging.DEBUG)
|
23
26
|
handler = logging.StreamHandler()
|
24
|
-
handler.setFormatter(
|
25
|
-
|
27
|
+
handler.setFormatter(
|
28
|
+
logging.Formatter("[{asctime}] {levelname:.5s} | {name} - {message}", style="{")
|
29
|
+
)
|
26
30
|
logger.addHandler(handler)
|
27
31
|
return logger
|
28
32
|
|
29
|
-
|
33
|
+
|
34
|
+
logger = get_gsMap_logger("gsMap")
|
35
|
+
|
30
36
|
|
31
37
|
# Decorator to register functions for cli parsing
|
32
38
|
def register_cli(name: str, description: str, add_args_function: Callable) -> Callable:
|
33
39
|
def decorator(func: Callable) -> Callable:
|
34
40
|
def wrapper(*args, **kwargs):
|
35
|
-
name.replace(
|
36
|
-
gsMap_main_logo = pyfiglet.figlet_format(
|
41
|
+
name.replace("_", " ")
|
42
|
+
gsMap_main_logo = pyfiglet.figlet_format(
|
43
|
+
"gsMap",
|
44
|
+
font="doom",
|
45
|
+
width=80,
|
46
|
+
justify="center",
|
47
|
+
).rstrip()
|
37
48
|
print(gsMap_main_logo, flush=True)
|
38
|
-
version_number =
|
49
|
+
version_number = "Version: " + __version__
|
39
50
|
print(version_number.center(80), flush=True)
|
40
|
-
print(
|
51
|
+
print("=" * 80, flush=True)
|
41
52
|
logger.info(f"Running {name}...")
|
42
53
|
func(*args, **kwargs)
|
43
54
|
logger.info(f"Finished running {name}.")
|
44
55
|
|
45
|
-
cli_function_registry[name] = subcommand(
|
46
|
-
|
56
|
+
cli_function_registry[name] = subcommand(
|
57
|
+
name=name, func=wrapper, add_args_function=add_args_function, description=description
|
58
|
+
)
|
47
59
|
return wrapper
|
48
60
|
|
49
61
|
return decorator
|
50
62
|
|
63
|
+
|
51
64
|
def add_shared_args(parser):
|
52
|
-
parser.add_argument(
|
53
|
-
|
65
|
+
parser.add_argument(
|
66
|
+
"--workdir", type=str, required=True, help="Path to the working directory."
|
67
|
+
)
|
68
|
+
parser.add_argument("--sample_name", type=str, required=True, help="Name of the sample.")
|
69
|
+
|
54
70
|
|
55
71
|
def add_find_latent_representations_args(parser):
|
56
72
|
add_shared_args(parser)
|
57
|
-
parser.add_argument(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
parser.add_argument(
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
parser.add_argument(
|
71
|
-
parser.add_argument(
|
72
|
-
|
73
|
+
parser.add_argument(
|
74
|
+
"--input_hdf5_path", required=True, type=str, help="Path to the input HDF5 file."
|
75
|
+
)
|
76
|
+
parser.add_argument(
|
77
|
+
"--annotation", required=True, type=str, help="Name of the annotation in adata.obs to use."
|
78
|
+
)
|
79
|
+
parser.add_argument(
|
80
|
+
"--data_layer",
|
81
|
+
type=str,
|
82
|
+
default="counts",
|
83
|
+
required=True,
|
84
|
+
help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
|
85
|
+
)
|
86
|
+
parser.add_argument("--epochs", type=int, default=300, help="Number of training epochs.")
|
87
|
+
parser.add_argument(
|
88
|
+
"--feat_hidden1", type=int, default=256, help="Neurons in the first hidden layer."
|
89
|
+
)
|
90
|
+
parser.add_argument(
|
91
|
+
"--feat_hidden2", type=int, default=128, help="Neurons in the second hidden layer."
|
92
|
+
)
|
93
|
+
parser.add_argument(
|
94
|
+
"--gat_hidden1", type=int, default=64, help="Units in the first GAT hidden layer."
|
95
|
+
)
|
96
|
+
parser.add_argument(
|
97
|
+
"--gat_hidden2", type=int, default=30, help="Units in the second GAT hidden layer."
|
98
|
+
)
|
99
|
+
parser.add_argument("--p_drop", type=float, default=0.1, help="Dropout rate.")
|
100
|
+
parser.add_argument("--gat_lr", type=float, default=0.001, help="Learning rate for the GAT.")
|
101
|
+
parser.add_argument("--n_neighbors", type=int, default=11, help="Number of neighbors for GAT.")
|
102
|
+
parser.add_argument(
|
103
|
+
"--n_comps", type=int, default=300, help="Number of principal components for PCA."
|
104
|
+
)
|
105
|
+
parser.add_argument(
|
106
|
+
"--weighted_adj", action="store_true", help="Use weighted adjacency in GAT."
|
107
|
+
)
|
108
|
+
parser.add_argument(
|
109
|
+
"--convergence_threshold", type=float, default=1e-4, help="Threshold for convergence."
|
110
|
+
)
|
111
|
+
parser.add_argument(
|
112
|
+
"--hierarchically",
|
113
|
+
action="store_true",
|
114
|
+
help="Enable hierarchical latent representation finding.",
|
115
|
+
)
|
73
116
|
|
74
117
|
|
75
118
|
def chrom_choice(value):
|
@@ -77,10 +120,12 @@ def chrom_choice(value):
|
|
77
120
|
ivalue = int(value)
|
78
121
|
if 1 <= ivalue <= 22:
|
79
122
|
return ivalue
|
80
|
-
elif value.lower() ==
|
123
|
+
elif value.lower() == "all":
|
81
124
|
return value
|
82
125
|
else:
|
83
|
-
raise argparse.ArgumentTypeError(
|
126
|
+
raise argparse.ArgumentTypeError(
|
127
|
+
f"'{value}' is an invalid chromosome choice. Choose from 1-22 or 'all'."
|
128
|
+
)
|
84
129
|
|
85
130
|
|
86
131
|
def filter_args_for_dataclass(args_dict, data_class: dataclass):
|
@@ -89,7 +134,7 @@ def filter_args_for_dataclass(args_dict, data_class: dataclass):
|
|
89
134
|
|
90
135
|
def get_dataclass_from_parser(args: argparse.Namespace, data_class: dataclass):
|
91
136
|
remain_kwargs = filter_args_for_dataclass(vars(args), data_class)
|
92
|
-
print(f
|
137
|
+
print(f"Using the following arguments for {data_class.__name__}:", flush=True)
|
93
138
|
pprint(remain_kwargs, indent=4)
|
94
139
|
sys.stdout.flush()
|
95
140
|
return data_class(**remain_kwargs)
|
@@ -97,178 +142,423 @@ def get_dataclass_from_parser(args: argparse.Namespace, data_class: dataclass):
|
|
97
142
|
|
98
143
|
def add_latent_to_gene_args(parser):
|
99
144
|
add_shared_args(parser)
|
100
|
-
parser.add_argument('--annotation', type=str, help='Name of the annotation in adata.obs to use. (optional).')
|
101
|
-
parser.add_argument('--no_expression_fraction', action='store_true', help='Skip expression fraction filtering.')
|
102
|
-
parser.add_argument('--latent_representation', type=str, choices=['latent_GVAE', 'latent_PCA'], default='latent_GVAE',
|
103
|
-
help='Type of latent representation.')
|
104
|
-
parser.add_argument('--num_neighbour', type=int, default=21, help='Number of neighbors.')
|
105
|
-
parser.add_argument('--num_neighbour_spatial', type=int, default=101, help='Number of spatial neighbors.')
|
106
|
-
# parser.add_argument('--species', type=str, help='Species name for homolog gene mapping (optional).')
|
107
|
-
parser.add_argument('--homolog_file', type=str, help='Path to homologous gene conversion file (optional).')
|
108
145
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
parser.add_argument(
|
116
|
-
|
117
|
-
|
118
|
-
parser.add_argument(
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
parser.add_argument(
|
125
|
-
parser.add_argument(
|
146
|
+
parser.add_argument(
|
147
|
+
"--input_hdf5_path",
|
148
|
+
type=str,
|
149
|
+
default=None,
|
150
|
+
help="Path to the input HDF5 file with latent representations, if --latent_representation is specified.",
|
151
|
+
)
|
152
|
+
parser.add_argument(
|
153
|
+
"--no_expression_fraction", action="store_true", help="Skip expression fraction filtering."
|
154
|
+
)
|
155
|
+
parser.add_argument(
|
156
|
+
"--latent_representation",
|
157
|
+
type=str,
|
158
|
+
default=None,
|
159
|
+
help="Type of latent representation. This should exist in the h5ad obsm.",
|
160
|
+
)
|
161
|
+
parser.add_argument("--num_neighbour", type=int, default=21, help="Number of neighbors.")
|
162
|
+
parser.add_argument(
|
163
|
+
"--num_neighbour_spatial", type=int, default=101, help="Number of spatial neighbors."
|
164
|
+
)
|
165
|
+
parser.add_argument(
|
166
|
+
"--homolog_file",
|
167
|
+
type=str,
|
168
|
+
default=None,
|
169
|
+
help="Path to homologous gene conversion file (optional).",
|
170
|
+
)
|
171
|
+
parser.add_argument(
|
172
|
+
"--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
|
173
|
+
)
|
174
|
+
parser.add_argument(
|
175
|
+
"--annotation",
|
176
|
+
type=str,
|
177
|
+
default=None,
|
178
|
+
help="Name of the annotation in adata.obs to use (optional).",
|
179
|
+
)
|
126
180
|
|
127
181
|
|
128
|
-
def
|
182
|
+
def add_generate_ldscore_args(parser):
|
129
183
|
add_shared_args(parser)
|
130
|
-
parser.add_argument(
|
131
|
-
parser.add_argument(
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
parser.add_argument(
|
184
|
+
parser.add_argument("--chrom", type=str, required=True, help='Chromosome id (1-22) or "all".')
|
185
|
+
parser.add_argument(
|
186
|
+
"--bfile_root",
|
187
|
+
type=str,
|
188
|
+
required=True,
|
189
|
+
help="Root path for genotype plink bfiles (.bim, .bed, .fam).",
|
190
|
+
)
|
191
|
+
parser.add_argument(
|
192
|
+
"--keep_snp_root", type=str, required=True, help="Root path for SNP files."
|
193
|
+
)
|
194
|
+
parser.add_argument(
|
195
|
+
"--gtf_annotation_file", type=str, required=True, help="Path to GTF annotation file."
|
196
|
+
)
|
197
|
+
parser.add_argument(
|
198
|
+
"--gene_window_size", type=int, default=50000, help="Gene window size in base pairs."
|
199
|
+
)
|
200
|
+
parser.add_argument(
|
201
|
+
"--enhancer_annotation_file", type=str, help="Path to enhancer annotation file (optional)."
|
202
|
+
)
|
203
|
+
parser.add_argument(
|
204
|
+
"--snp_multiple_enhancer_strategy",
|
205
|
+
type=str,
|
206
|
+
choices=["max_mkscore", "nearest_TSS"],
|
207
|
+
default="max_mkscore",
|
208
|
+
help="Strategy for handling multiple enhancers per SNP.",
|
209
|
+
)
|
210
|
+
parser.add_argument(
|
211
|
+
"--gene_window_enhancer_priority",
|
212
|
+
type=str,
|
213
|
+
choices=["gene_window_first", "enhancer_first", "enhancer_only"],
|
214
|
+
help="Priority between gene window and enhancer annotations.",
|
215
|
+
)
|
216
|
+
parser.add_argument(
|
217
|
+
"--spots_per_chunk", type=int, default=1000, help="Number of spots per chunk."
|
218
|
+
)
|
219
|
+
parser.add_argument("--ld_wind", type=int, default=1, help="LD window size.")
|
220
|
+
parser.add_argument(
|
221
|
+
"--ld_unit",
|
222
|
+
type=str,
|
223
|
+
choices=["SNP", "KB", "CM"],
|
224
|
+
default="CM",
|
225
|
+
help="Unit for LD window.",
|
226
|
+
)
|
227
|
+
parser.add_argument(
|
228
|
+
"--additional_baseline_annotation",
|
229
|
+
type=str,
|
230
|
+
default=None,
|
231
|
+
help="Path of additional baseline annotations",
|
232
|
+
)
|
138
233
|
|
139
234
|
|
140
235
|
def add_spatial_ldsc_args(parser):
|
141
236
|
add_shared_args(parser)
|
142
|
-
parser.add_argument(
|
143
|
-
|
144
|
-
|
145
|
-
parser.add_argument(
|
146
|
-
|
147
|
-
|
148
|
-
parser.add_argument(
|
237
|
+
parser.add_argument(
|
238
|
+
"--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
|
239
|
+
)
|
240
|
+
parser.add_argument(
|
241
|
+
"--w_file", type=str, required=True, help="Path to regression weight file."
|
242
|
+
)
|
243
|
+
parser.add_argument(
|
244
|
+
"--trait_name", type=str, required=True, help="Name of the trait being analyzed."
|
245
|
+
)
|
246
|
+
parser.add_argument(
|
247
|
+
"--n_blocks", type=int, default=200, help="Number of blocks for jackknife resampling."
|
248
|
+
)
|
249
|
+
parser.add_argument(
|
250
|
+
"--chisq_max", type=int, help="Maximum chi-square value for filtering SNPs."
|
251
|
+
)
|
252
|
+
parser.add_argument(
|
253
|
+
"--num_processes", type=int, default=4, help="Number of processes for parallel computing."
|
254
|
+
)
|
255
|
+
parser.add_argument(
|
256
|
+
"--use_additional_baseline_annotation",
|
257
|
+
type=bool,
|
258
|
+
nargs="?",
|
259
|
+
const=True,
|
260
|
+
default=True,
|
261
|
+
help="Use additional baseline annotations when provided",
|
262
|
+
)
|
149
263
|
|
150
264
|
|
151
265
|
def add_Cauchy_combination_args(parser):
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
parser.add_argument(
|
156
|
-
|
266
|
+
parser.add_argument(
|
267
|
+
"--workdir", type=str, required=True, help="Path to the working directory."
|
268
|
+
)
|
269
|
+
parser.add_argument("--sample_name", type=str, required=False, help="Name of the sample.")
|
270
|
+
|
271
|
+
parser.add_argument(
|
272
|
+
"--trait_name", type=str, required=True, help="Name of the trait being analyzed."
|
273
|
+
)
|
274
|
+
parser.add_argument(
|
275
|
+
"--annotation", type=str, required=True, help="Name of the annotation in adata.obs to use."
|
276
|
+
)
|
277
|
+
|
278
|
+
parser.add_argument(
|
279
|
+
"--sample_name_list",
|
280
|
+
type=str,
|
281
|
+
nargs="+",
|
282
|
+
required=False,
|
283
|
+
help="List of sample names to process. Provide as a space-separated list.",
|
284
|
+
)
|
285
|
+
parser.add_argument(
|
286
|
+
"--output_file",
|
287
|
+
type=str,
|
288
|
+
required=False,
|
289
|
+
help="Path to save the combined Cauchy results. Required when using multiple samples.",
|
290
|
+
)
|
157
291
|
|
158
292
|
|
159
293
|
def add_report_args(parser):
|
160
294
|
add_shared_args(parser)
|
161
|
-
parser.add_argument(
|
162
|
-
|
295
|
+
parser.add_argument(
|
296
|
+
"--trait_name",
|
297
|
+
type=str,
|
298
|
+
required=True,
|
299
|
+
help="Name of the trait to generate the report for.",
|
300
|
+
)
|
301
|
+
parser.add_argument("--annotation", type=str, required=True, help="Annotation layer name.")
|
163
302
|
# parser.add_argument('--plot_type', type=str, choices=['manhattan', 'GSS', 'gsMap', 'all'], default='all',
|
164
303
|
# help="Type of diagnostic plot to generate. Choose from 'manhattan', 'GSS', 'gsMap', or 'all'.")
|
165
|
-
parser.add_argument(
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
304
|
+
parser.add_argument(
|
305
|
+
"--top_corr_genes", type=int, default=50, help="Number of top correlated genes to display."
|
306
|
+
)
|
307
|
+
parser.add_argument(
|
308
|
+
"--selected_genes",
|
309
|
+
type=str,
|
310
|
+
nargs="*",
|
311
|
+
help="List of specific genes to include in the report (optional).",
|
312
|
+
)
|
313
|
+
parser.add_argument(
|
314
|
+
"--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
|
315
|
+
)
|
170
316
|
|
171
317
|
# Optional arguments for customization
|
172
|
-
parser.add_argument(
|
173
|
-
|
174
|
-
|
175
|
-
parser.add_argument(
|
176
|
-
|
318
|
+
parser.add_argument(
|
319
|
+
"--fig_width", type=int, default=None, help="Width of the generated figures in pixels."
|
320
|
+
)
|
321
|
+
parser.add_argument(
|
322
|
+
"--fig_height", type=int, default=None, help="Height of the generated figures in pixels."
|
323
|
+
)
|
324
|
+
parser.add_argument("--point_size", type=int, default=None, help="Point size for the figures.")
|
325
|
+
parser.add_argument(
|
326
|
+
"--fig_style",
|
327
|
+
type=str,
|
328
|
+
default="light",
|
329
|
+
choices=["dark", "light"],
|
330
|
+
help="Style of the generated figures.",
|
331
|
+
)
|
332
|
+
|
333
|
+
|
334
|
+
def add_create_slice_mean_args(parser):
|
335
|
+
parser.add_argument(
|
336
|
+
"--sample_name_list",
|
337
|
+
type=str,
|
338
|
+
nargs="+",
|
339
|
+
required=True,
|
340
|
+
help="List of sample names to process. Provide as a space-separated list.",
|
341
|
+
)
|
342
|
+
|
343
|
+
parser.add_argument(
|
344
|
+
"--h5ad_list",
|
345
|
+
type=str,
|
346
|
+
nargs="+",
|
347
|
+
help="List of h5ad file paths corresponding to the sample names. Provide as a space-separated list.",
|
348
|
+
)
|
349
|
+
parser.add_argument(
|
350
|
+
"--h5ad_yaml",
|
351
|
+
type=str,
|
352
|
+
default=None,
|
353
|
+
help="Path to the YAML file containing sample names and associated h5ad file paths",
|
354
|
+
)
|
355
|
+
parser.add_argument(
|
356
|
+
"--slice_mean_output_file",
|
357
|
+
type=str,
|
358
|
+
required=True,
|
359
|
+
help="Path to the output file for the slice mean",
|
360
|
+
)
|
361
|
+
parser.add_argument(
|
362
|
+
"--homolog_file", type=str, help="Path to homologous gene conversion file (optional)."
|
363
|
+
)
|
364
|
+
parser.add_argument(
|
365
|
+
"--data_layer",
|
366
|
+
type=str,
|
367
|
+
default="counts",
|
368
|
+
required=True,
|
369
|
+
help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
|
370
|
+
)
|
371
|
+
|
177
372
|
|
178
373
|
def add_format_sumstats_args(parser):
|
179
374
|
# Required arguments
|
180
|
-
parser.add_argument(
|
181
|
-
|
182
|
-
|
183
|
-
|
375
|
+
parser.add_argument("--sumstats", required=True, type=str, help="Path to gwas summary data")
|
376
|
+
parser.add_argument(
|
377
|
+
"--out", required=True, type=str, help="Path to save the formatted gwas data"
|
378
|
+
)
|
184
379
|
|
185
380
|
# Arguments for specify column name
|
186
|
-
parser.add_argument(
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
parser.add_argument(
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
parser.add_argument(
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
parser.add_argument(
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
381
|
+
parser.add_argument(
|
382
|
+
"--snp",
|
383
|
+
default=None,
|
384
|
+
type=str,
|
385
|
+
help="Name of snp column (if not a name that gsMap understands)",
|
386
|
+
)
|
387
|
+
parser.add_argument(
|
388
|
+
"--a1",
|
389
|
+
default=None,
|
390
|
+
type=str,
|
391
|
+
help="Name of effect allele column (if not a name that gsMap understands)",
|
392
|
+
)
|
393
|
+
parser.add_argument(
|
394
|
+
"--a2",
|
395
|
+
default=None,
|
396
|
+
type=str,
|
397
|
+
help="Name of none-effect allele column (if not a name that gsMap understands)",
|
398
|
+
)
|
399
|
+
parser.add_argument(
|
400
|
+
"--info",
|
401
|
+
default=None,
|
402
|
+
type=str,
|
403
|
+
help="Name of info column (if not a name that gsMap understands)",
|
404
|
+
)
|
405
|
+
parser.add_argument(
|
406
|
+
"--beta",
|
407
|
+
default=None,
|
408
|
+
type=str,
|
409
|
+
help="Name of gwas beta column (if not a name that gsMap understands).",
|
410
|
+
)
|
411
|
+
parser.add_argument(
|
412
|
+
"--se",
|
413
|
+
default=None,
|
414
|
+
type=str,
|
415
|
+
help="Name of gwas standar error of beta column (if not a name that gsMap understands)",
|
416
|
+
)
|
417
|
+
parser.add_argument(
|
418
|
+
"--p",
|
419
|
+
default=None,
|
420
|
+
type=str,
|
421
|
+
help="Name of p-value column (if not a name that gsMap understands)",
|
422
|
+
)
|
423
|
+
parser.add_argument(
|
424
|
+
"--frq",
|
425
|
+
default=None,
|
426
|
+
type=str,
|
427
|
+
help="Name of A1 ferquency column (if not a name that gsMap understands)",
|
428
|
+
)
|
429
|
+
parser.add_argument(
|
430
|
+
"--n",
|
431
|
+
default=None,
|
432
|
+
type=str,
|
433
|
+
help="Name of sample size column (if not a name that gsMap understands)",
|
434
|
+
)
|
435
|
+
parser.add_argument(
|
436
|
+
"--z",
|
437
|
+
default=None,
|
438
|
+
type=str,
|
439
|
+
help="Name of gwas Z-statistics column (if not a name that gsMap understands)",
|
440
|
+
)
|
441
|
+
parser.add_argument(
|
442
|
+
"--OR",
|
443
|
+
default=None,
|
444
|
+
type=str,
|
445
|
+
help="Name of gwas OR column (if not a name that gsMap understands)",
|
446
|
+
)
|
447
|
+
parser.add_argument(
|
448
|
+
"--se_OR",
|
449
|
+
default=None,
|
450
|
+
type=str,
|
451
|
+
help="Name of standar error of OR column (if not a name that gsMap understands)",
|
452
|
+
)
|
210
453
|
|
211
454
|
# Arguments for convert SNP (chr, pos) to rsid
|
212
|
-
parser.add_argument(
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
parser.add_argument(
|
219
|
-
|
455
|
+
parser.add_argument(
|
456
|
+
"--chr",
|
457
|
+
default="Chr",
|
458
|
+
type=str,
|
459
|
+
help="Name of SNP chromosome column (if not a name that gsMap understands)",
|
460
|
+
)
|
461
|
+
parser.add_argument(
|
462
|
+
"--pos",
|
463
|
+
default="Pos",
|
464
|
+
type=str,
|
465
|
+
help="Name of SNP positions column (if not a name that gsMap understands)",
|
466
|
+
)
|
467
|
+
parser.add_argument("--dbsnp", default=None, type=str, help="Path to reference dnsnp file")
|
468
|
+
parser.add_argument(
|
469
|
+
"--chunksize", default=1e6, type=int, help="Chunk size for loading dbsnp file"
|
470
|
+
)
|
220
471
|
|
221
472
|
# Arguments for output format and quality
|
222
|
-
parser.add_argument(
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
473
|
+
parser.add_argument(
|
474
|
+
"--format",
|
475
|
+
default="gsMap",
|
476
|
+
type=str,
|
477
|
+
help="Format of output data",
|
478
|
+
choices=["gsMap", "COJO"],
|
479
|
+
)
|
480
|
+
parser.add_argument("--info_min", default=0.9, type=float, help="Minimum INFO score.")
|
481
|
+
parser.add_argument("--maf_min", default=0.01, type=float, help="Minimum MAF.")
|
482
|
+
parser.add_argument(
|
483
|
+
"--keep_chr_pos",
|
484
|
+
action="store_true",
|
485
|
+
default=False,
|
486
|
+
help="Keep SNP chromosome and position columns in the output data",
|
487
|
+
)
|
488
|
+
|
230
489
|
|
231
490
|
def add_run_all_mode_args(parser):
|
232
491
|
add_shared_args(parser)
|
233
492
|
|
234
493
|
# Required paths and configurations
|
235
|
-
parser.add_argument(
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
parser.add_argument(
|
242
|
-
|
494
|
+
parser.add_argument(
|
495
|
+
"--gsMap_resource_dir",
|
496
|
+
type=str,
|
497
|
+
required=True,
|
498
|
+
help="Directory containing gsMap resources (e.g., genome annotations, LD reference panel, etc.).",
|
499
|
+
)
|
500
|
+
parser.add_argument(
|
501
|
+
"--hdf5_path",
|
502
|
+
type=str,
|
503
|
+
required=True,
|
504
|
+
help="Path to the input spatial transcriptomics data (H5AD format).",
|
505
|
+
)
|
506
|
+
parser.add_argument(
|
507
|
+
"--annotation", type=str, required=True, help="Name of the annotation in adata.obs to use."
|
508
|
+
)
|
509
|
+
parser.add_argument(
|
510
|
+
"--data_layer",
|
511
|
+
type=str,
|
512
|
+
default="counts",
|
513
|
+
required=True,
|
514
|
+
help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
|
515
|
+
)
|
243
516
|
|
244
517
|
# GWAS Data Parameters
|
245
|
-
parser.add_argument(
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
518
|
+
parser.add_argument(
|
519
|
+
"--trait_name",
|
520
|
+
type=str,
|
521
|
+
help="Name of the trait for GWAS analysis (required if sumstats_file is provided).",
|
522
|
+
)
|
523
|
+
parser.add_argument(
|
524
|
+
"--sumstats_file",
|
525
|
+
type=str,
|
526
|
+
help="Path to GWAS summary statistics file. Either sumstats_file or sumstats_config_file is required.",
|
527
|
+
)
|
528
|
+
parser.add_argument(
|
529
|
+
"--sumstats_config_file",
|
530
|
+
type=str,
|
531
|
+
help="Path to GWAS summary statistics config file. Either sumstats_file or sumstats_config_file is required.",
|
532
|
+
)
|
250
533
|
|
251
534
|
# Homolog Data Parameters
|
252
|
-
parser.add_argument(
|
253
|
-
|
535
|
+
parser.add_argument(
|
536
|
+
"--homolog_file",
|
537
|
+
type=str,
|
538
|
+
help="Path to homologous gene for converting gene names from different species to human (optional, used for cross-species analysis).",
|
539
|
+
)
|
254
540
|
|
255
541
|
# Maximum number of processes
|
256
|
-
parser.add_argument(
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
542
|
+
parser.add_argument(
|
543
|
+
"--max_processes",
|
544
|
+
type=int,
|
545
|
+
default=10,
|
546
|
+
help="Maximum number of processes for parallel execution.",
|
547
|
+
)
|
548
|
+
|
549
|
+
parser.add_argument(
|
550
|
+
"--latent_representation",
|
551
|
+
type=str,
|
552
|
+
default=None,
|
553
|
+
help="Type of latent representation. This should exist in the h5ad obsm.",
|
554
|
+
)
|
555
|
+
parser.add_argument("--num_neighbour", type=int, default=21, help="Number of neighbors.")
|
556
|
+
parser.add_argument(
|
557
|
+
"--num_neighbour_spatial", type=int, default=101, help="Number of spatial neighbors."
|
558
|
+
)
|
559
|
+
parser.add_argument(
|
560
|
+
"--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
|
561
|
+
)
|
272
562
|
|
273
563
|
|
274
564
|
def ensure_path_exists(func):
|
@@ -288,75 +578,136 @@ def ensure_path_exists(func):
|
|
288
578
|
@dataclass
|
289
579
|
class ConfigWithAutoPaths:
|
290
580
|
workdir: str
|
291
|
-
sample_name: str
|
581
|
+
sample_name: str | None
|
292
582
|
|
293
583
|
def __post_init__(self):
|
294
584
|
if self.workdir is None:
|
295
|
-
raise ValueError(
|
585
|
+
raise ValueError("workdir must be provided.")
|
296
586
|
|
297
587
|
@property
|
298
588
|
@ensure_path_exists
|
299
589
|
def hdf5_with_latent_path(self) -> Path:
|
300
|
-
return Path(
|
590
|
+
return Path(
|
591
|
+
f"{self.workdir}/{self.sample_name}/find_latent_representations/{self.sample_name}_add_latent.h5ad"
|
592
|
+
)
|
301
593
|
|
302
594
|
@property
|
303
595
|
@ensure_path_exists
|
304
596
|
def mkscore_feather_path(self) -> Path:
|
305
|
-
return Path(
|
597
|
+
return Path(
|
598
|
+
f"{self.workdir}/{self.sample_name}/latent_to_gene/{self.sample_name}_gene_marker_score.feather"
|
599
|
+
)
|
306
600
|
|
307
601
|
@property
|
308
602
|
@ensure_path_exists
|
309
603
|
def ldscore_save_dir(self) -> Path:
|
310
|
-
return Path(f
|
604
|
+
return Path(f"{self.workdir}/{self.sample_name}/generate_ldscore")
|
311
605
|
|
312
606
|
@property
|
313
607
|
@ensure_path_exists
|
314
608
|
def ldsc_save_dir(self) -> Path:
|
315
|
-
return Path(f
|
609
|
+
return Path(f"{self.workdir}/{self.sample_name}/spatial_ldsc")
|
316
610
|
|
317
611
|
@property
|
318
612
|
@ensure_path_exists
|
319
613
|
def cauchy_save_dir(self) -> Path:
|
320
|
-
return Path(f
|
614
|
+
return Path(f"{self.workdir}/{self.sample_name}/cauchy_combination")
|
321
615
|
|
322
616
|
@ensure_path_exists
|
323
617
|
def get_report_dir(self, trait_name: str) -> Path:
|
324
|
-
return Path(f
|
618
|
+
return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}")
|
325
619
|
|
326
620
|
def get_gsMap_report_file(self, trait_name: str) -> Path:
|
327
|
-
return
|
621
|
+
return (
|
622
|
+
self.get_report_dir(trait_name) / f"{self.sample_name}_{trait_name}_gsMap_Report.html"
|
623
|
+
)
|
328
624
|
|
329
625
|
@ensure_path_exists
|
330
626
|
def get_manhattan_html_plot_path(self, trait_name: str) -> Path:
|
331
627
|
return Path(
|
332
|
-
f
|
628
|
+
f"{self.workdir}/{self.sample_name}/report/{trait_name}/manhattan_plot/{self.sample_name}_{trait_name}_Diagnostic_Manhattan_Plot.html"
|
629
|
+
)
|
333
630
|
|
334
631
|
@ensure_path_exists
|
335
632
|
def get_GSS_plot_dir(self, trait_name: str) -> Path:
|
336
|
-
return Path(f
|
633
|
+
return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}/GSS_plot")
|
337
634
|
|
338
635
|
def get_GSS_plot_select_gene_file(self, trait_name: str) -> Path:
|
339
|
-
return self.get_GSS_plot_dir(trait_name) /
|
636
|
+
return self.get_GSS_plot_dir(trait_name) / "plot_genes.csv"
|
340
637
|
|
341
638
|
@ensure_path_exists
|
342
639
|
def get_ldsc_result_file(self, trait_name: str) -> Path:
|
343
|
-
return Path(f
|
640
|
+
return Path(f"{self.ldsc_save_dir}/{self.sample_name}_{trait_name}.csv.gz")
|
344
641
|
|
345
642
|
@ensure_path_exists
|
346
643
|
def get_cauchy_result_file(self, trait_name: str) -> Path:
|
347
|
-
return Path(f
|
644
|
+
return Path(f"{self.cauchy_save_dir}/{self.sample_name}_{trait_name}.Cauchy.csv.gz")
|
348
645
|
|
349
646
|
@ensure_path_exists
|
350
647
|
def get_gene_diagnostic_info_save_path(self, trait_name: str) -> Path:
|
351
648
|
return Path(
|
352
|
-
f
|
649
|
+
f"{self.workdir}/{self.sample_name}/report/{trait_name}/{self.sample_name}_{trait_name}_Gene_Diagnostic_Info.csv"
|
650
|
+
)
|
353
651
|
|
354
652
|
@ensure_path_exists
|
355
653
|
def get_gsMap_plot_save_dir(self, trait_name: str) -> Path:
|
356
|
-
return Path(f
|
654
|
+
return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}/gsMap_plot")
|
357
655
|
|
358
656
|
def get_gsMap_html_plot_save_path(self, trait_name: str) -> Path:
|
359
|
-
return
|
657
|
+
return (
|
658
|
+
self.get_gsMap_plot_save_dir(trait_name)
|
659
|
+
/ f"{self.sample_name}_{trait_name}_gsMap_plot.html"
|
660
|
+
)
|
661
|
+
|
662
|
+
|
663
|
+
@dataclass
|
664
|
+
class CreateSliceMeanConfig:
|
665
|
+
slice_mean_output_file: str | Path
|
666
|
+
h5ad_yaml: str | dict | None = None
|
667
|
+
sample_name_list: list | None = None
|
668
|
+
h5ad_list: list | None = None
|
669
|
+
homolog_file: str | None = None
|
670
|
+
species: str | None = None
|
671
|
+
data_layer: str = None
|
672
|
+
|
673
|
+
def __post_init__(self):
|
674
|
+
if self.h5ad_list is None and self.h5ad_yaml is None:
|
675
|
+
raise ValueError("At least one of --h5ad_list or --h5ad_yaml must be provided.")
|
676
|
+
if self.h5ad_yaml is not None:
|
677
|
+
if isinstance(self.h5ad_yaml, str):
|
678
|
+
logger.info(f"Reading h5ad yaml file: {self.h5ad_yaml}")
|
679
|
+
h5ad_dict = (
|
680
|
+
yaml.safe_load(open(self.h5ad_yaml))
|
681
|
+
if isinstance(self.h5ad_yaml, str)
|
682
|
+
else self.h5ad_yaml
|
683
|
+
)
|
684
|
+
elif self.sample_name_list and self.h5ad_list:
|
685
|
+
logger.info("Reading sample name list and h5ad list")
|
686
|
+
h5ad_dict = dict(zip(self.sample_name_list, self.h5ad_list, strict=False))
|
687
|
+
else:
|
688
|
+
raise ValueError(
|
689
|
+
"Please provide either h5ad_yaml or both sample_name_list and h5ad_list."
|
690
|
+
)
|
691
|
+
|
692
|
+
# check if sample names is unique
|
693
|
+
assert len(h5ad_dict) == len(set(h5ad_dict)), "Sample names must be unique."
|
694
|
+
assert len(h5ad_dict) > 1, "At least two samples are required."
|
695
|
+
|
696
|
+
logger.info(f"Input h5ad files: {h5ad_dict}")
|
697
|
+
|
698
|
+
# Check if all files exist
|
699
|
+
self.h5ad_dict = {}
|
700
|
+
for sample_name, h5ad_file in h5ad_dict.items():
|
701
|
+
h5ad_file = Path(h5ad_file)
|
702
|
+
if not h5ad_file.exists():
|
703
|
+
raise FileNotFoundError(f"{h5ad_file} does not exist.")
|
704
|
+
self.h5ad_dict[sample_name] = h5ad_file
|
705
|
+
|
706
|
+
self.slice_mean_output_file = Path(self.slice_mean_output_file)
|
707
|
+
self.slice_mean_output_file.parent.mkdir(parents=True, exist_ok=True)
|
708
|
+
|
709
|
+
verify_homolog_file_format(self)
|
710
|
+
|
360
711
|
|
361
712
|
@dataclass
|
362
713
|
class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
|
@@ -389,24 +740,27 @@ class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
|
|
389
740
|
# self.output_hdf5_path = self.hdf5_with_latent_path
|
390
741
|
if self.hierarchically:
|
391
742
|
if self.annotation is None:
|
392
|
-
raise ValueError(
|
743
|
+
raise ValueError("annotation must be provided if hierarchically is True.")
|
393
744
|
logger.info(
|
394
|
-
|
745
|
+
"------Hierarchical mode is enabled. This will find the latent representations within each annotation."
|
746
|
+
)
|
395
747
|
|
396
748
|
# remind for not providing annotation
|
397
749
|
if self.annotation is None:
|
398
750
|
logger.warning(
|
399
|
-
|
751
|
+
"annotation is not provided. This will find the latent representations for the whole dataset."
|
752
|
+
)
|
400
753
|
else:
|
401
|
-
logger.info(f
|
754
|
+
logger.info(f"------Find latent representations for {self.annotation}...")
|
402
755
|
|
403
756
|
|
404
757
|
@dataclass
|
405
758
|
class LatentToGeneConfig(ConfigWithAutoPaths):
|
406
759
|
# input_hdf5_with_latent_path: str
|
407
760
|
# output_feather_path: str
|
761
|
+
input_hdf5_path: str | Path = None
|
408
762
|
no_expression_fraction: bool = False
|
409
|
-
latent_representation: str =
|
763
|
+
latent_representation: str = None
|
410
764
|
num_neighbour: int = 21
|
411
765
|
num_neighbour_spatial: int = 101
|
412
766
|
homolog_file: str = None
|
@@ -415,31 +769,61 @@ class LatentToGeneConfig(ConfigWithAutoPaths):
|
|
415
769
|
species: str = None
|
416
770
|
|
417
771
|
def __post_init__(self):
|
418
|
-
if self.
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
_n_col = len(first_line.split())
|
424
|
-
if _n_col != 2:
|
425
|
-
raise ValueError(
|
426
|
-
f"Invalid homolog file format. Expected 2 columns, first column should be other species gene name, second column should be human gene name. "
|
427
|
-
f"Got {_n_col} columns in the first line.")
|
428
|
-
else:
|
429
|
-
first_col_name, second_col_name = first_line.split()
|
430
|
-
self.species = first_col_name
|
431
|
-
logger.info(
|
432
|
-
f"Homolog file provided and will map gene name from column1:{first_col_name} to column2:{second_col_name}")
|
772
|
+
if self.input_hdf5_path is None:
|
773
|
+
self.input_hdf5_path = self.hdf5_with_latent_path
|
774
|
+
assert self.input_hdf5_path.exists(), (
|
775
|
+
f"{self.input_hdf5_path} does not exist. Please run FindLatentRepresentations first."
|
776
|
+
)
|
433
777
|
else:
|
434
|
-
|
778
|
+
assert Path(self.input_hdf5_path).exists(), f"{self.input_hdf5_path} does not exist."
|
779
|
+
# copy to self.hdf5_with_latent_path
|
780
|
+
import shutil
|
781
|
+
|
782
|
+
shutil.copy2(self.input_hdf5_path, self.hdf5_with_latent_path)
|
783
|
+
|
784
|
+
if self.latent_representation is not None:
|
785
|
+
logger.info(f"Using the provided latent representation: {self.latent_representation}")
|
786
|
+
else:
|
787
|
+
self.latent_representation = "latent_GVAE"
|
788
|
+
logger.info(f"Using default latent representation: {self.latent_representation}")
|
789
|
+
|
790
|
+
if self.gM_slices is not None:
|
791
|
+
assert Path(self.gM_slices).exists(), f"{self.gM_slices} does not exist."
|
792
|
+
logger.info(f"Using the provided slice mean file: {self.gM_slices}.")
|
793
|
+
|
794
|
+
verify_homolog_file_format(self)
|
795
|
+
|
796
|
+
|
797
|
+
def verify_homolog_file_format(config):
|
798
|
+
if config.homolog_file is not None:
|
799
|
+
logger.info(
|
800
|
+
f"User provided homolog file to map gene names to human: {config.homolog_file}"
|
801
|
+
)
|
802
|
+
# check the format of the homolog file
|
803
|
+
with open(config.homolog_file) as f:
|
804
|
+
first_line = f.readline().strip()
|
805
|
+
_n_col = len(first_line.split())
|
806
|
+
if _n_col != 2:
|
807
|
+
raise ValueError(
|
808
|
+
f"Invalid homolog file format. Expected 2 columns, first column should be other species gene name, second column should be human gene name. "
|
809
|
+
f"Got {_n_col} columns in the first line."
|
810
|
+
)
|
811
|
+
else:
|
812
|
+
first_col_name, second_col_name = first_line.split()
|
813
|
+
config.species = first_col_name
|
814
|
+
logger.info(
|
815
|
+
f"Homolog file provided and will map gene name from column1:{first_col_name} to column2:{second_col_name}"
|
816
|
+
)
|
817
|
+
else:
|
818
|
+
logger.info("No homolog file provided. Run in human mode.")
|
435
819
|
|
436
820
|
|
437
821
|
@dataclass
|
438
822
|
class GenerateLDScoreConfig(ConfigWithAutoPaths):
|
439
|
-
chrom:
|
823
|
+
chrom: int | str
|
440
824
|
|
441
825
|
bfile_root: str
|
442
|
-
keep_snp_root:
|
826
|
+
keep_snp_root: str | None
|
443
827
|
|
444
828
|
# annotation by gene distance
|
445
829
|
gtf_annotation_file: str
|
@@ -447,74 +831,106 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
|
|
447
831
|
|
448
832
|
# annotation by enhancer
|
449
833
|
enhancer_annotation_file: str = None
|
450
|
-
snp_multiple_enhancer_strategy: Literal[
|
451
|
-
gene_window_enhancer_priority:
|
834
|
+
snp_multiple_enhancer_strategy: Literal["max_mkscore", "nearest_TSS"] = "max_mkscore"
|
835
|
+
gene_window_enhancer_priority: (
|
836
|
+
Literal["gene_window_first", "enhancer_first", "enhancer_only"] | None
|
837
|
+
) = None
|
452
838
|
|
453
839
|
# for calculating ld score
|
454
840
|
additional_baseline_annotation: str = None
|
455
841
|
spots_per_chunk: int = 1_000
|
456
842
|
ld_wind: int = 1
|
457
|
-
ld_unit: str =
|
843
|
+
ld_unit: str = "CM"
|
458
844
|
|
459
845
|
# zarr config
|
460
|
-
ldscore_save_format: Literal[
|
846
|
+
ldscore_save_format: Literal["feather", "zarr", "quick_mode"] = "feather"
|
461
847
|
|
462
|
-
zarr_chunk_size:
|
848
|
+
zarr_chunk_size: tuple[int, int] = None
|
463
849
|
|
464
850
|
# for pre calculating the SNP Gene ldscore Weight
|
465
851
|
save_pre_calculate_snp_gene_weight_matrix: bool = False
|
466
852
|
|
467
|
-
baseline_annotation_dir:
|
468
|
-
SNP_gene_pair_dir:
|
853
|
+
baseline_annotation_dir: str | None = None
|
854
|
+
SNP_gene_pair_dir: str | None = None
|
855
|
+
|
469
856
|
def __post_init__(self):
|
470
857
|
# if self.mkscore_feather_file is None:
|
471
858
|
# self.mkscore_feather_file = self._get_mkscore_feather_path()
|
472
859
|
|
473
|
-
if
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
860
|
+
if (
|
861
|
+
self.enhancer_annotation_file is not None
|
862
|
+
and self.gene_window_enhancer_priority is None
|
863
|
+
):
|
864
|
+
logger.warning(
|
865
|
+
"enhancer_annotation_file is provided but gene_window_enhancer_priority is not provided. "
|
866
|
+
"by default, gene_window_enhancer_priority is set to 'enhancer_only', when enhancer_annotation_file is provided."
|
867
|
+
)
|
868
|
+
self.gene_window_enhancer_priority = "enhancer_only"
|
869
|
+
if (
|
870
|
+
self.enhancer_annotation_file is None
|
871
|
+
and self.gene_window_enhancer_priority is not None
|
872
|
+
):
|
873
|
+
logger.warning(
|
874
|
+
"gene_window_enhancer_priority is provided but enhancer_annotation_file is not provided. "
|
875
|
+
"by default, gene_window_enhancer_priority is set to None, when enhancer_annotation_file is not provided."
|
876
|
+
)
|
480
877
|
self.gene_window_enhancer_priority = None
|
481
|
-
assert self.gene_window_enhancer_priority in [
|
878
|
+
assert self.gene_window_enhancer_priority in [
|
879
|
+
None,
|
880
|
+
"gene_window_first",
|
881
|
+
"enhancer_first",
|
882
|
+
"enhancer_only",
|
883
|
+
], (
|
482
884
|
f"gene_window_enhancer_priority must be one of None, 'gene_window_first', 'enhancer_first', 'enhancer_only', but got {self.gene_window_enhancer_priority}."
|
483
|
-
|
484
|
-
|
885
|
+
)
|
886
|
+
if self.gene_window_enhancer_priority in ["gene_window_first", "enhancer_first"]:
|
485
887
|
logger.info(
|
486
|
-
|
487
|
-
|
488
|
-
logger.info(
|
888
|
+
"Both gene_window and enhancer annotation will be used to calculate LD score. "
|
889
|
+
)
|
890
|
+
logger.info(
|
891
|
+
f"SNP within +-{self.gene_window_size} bp of gene body will be used and enhancer annotation will be used to calculate LD score. If a snp maps to multiple enhancers, the strategy to choose by your select strategy: {self.snp_multiple_enhancer_strategy}."
|
892
|
+
)
|
893
|
+
elif self.gene_window_enhancer_priority == "enhancer_only":
|
894
|
+
logger.info("Only enhancer annotation will be used to calculate LD score. ")
|
489
895
|
else:
|
490
896
|
logger.info(
|
491
|
-
f
|
897
|
+
f"Only gene window annotation will be used to calculate LD score. SNP within +-{self.gene_window_size} bp of gene body will be used. "
|
898
|
+
)
|
492
899
|
|
493
900
|
# remind for baseline annotation
|
494
901
|
if self.additional_baseline_annotation is None:
|
495
|
-
logger.info(
|
902
|
+
logger.info(
|
903
|
+
"------Baseline annotation is not provided. Default baseline annotation will be used."
|
904
|
+
)
|
496
905
|
else:
|
497
906
|
logger.info(
|
498
|
-
|
499
|
-
|
907
|
+
"------Baseline annotation is provided. Additional baseline annotation will be used with the default baseline annotation."
|
908
|
+
)
|
909
|
+
logger.info(
|
910
|
+
f"------Baseline annotation directory: {self.additional_baseline_annotation}"
|
911
|
+
)
|
500
912
|
# check the existence of baseline annotation
|
501
|
-
if self.chrom ==
|
913
|
+
if self.chrom == "all":
|
502
914
|
for chrom in range(1, 23):
|
503
915
|
chrom = str(chrom)
|
504
|
-
baseline_annotation_path =
|
505
|
-
self.additional_baseline_annotation) / f
|
916
|
+
baseline_annotation_path = (
|
917
|
+
Path(self.additional_baseline_annotation) / f"baseline.{chrom}.annot.gz"
|
918
|
+
)
|
506
919
|
if not baseline_annotation_path.exists():
|
507
920
|
raise FileNotFoundError(
|
508
|
-
f
|
921
|
+
f"baseline.{chrom}.annot.gz is not found in {self.additional_baseline_annotation}."
|
922
|
+
)
|
509
923
|
else:
|
510
|
-
baseline_annotation_path =
|
511
|
-
self.additional_baseline_annotation) / f
|
924
|
+
baseline_annotation_path = (
|
925
|
+
Path(self.additional_baseline_annotation) / f"baseline.{self.chrom}.annot.gz"
|
926
|
+
)
|
512
927
|
if not baseline_annotation_path.exists():
|
513
928
|
raise FileNotFoundError(
|
514
|
-
f
|
929
|
+
f"baseline.{self.chrom}.annot.gz is not found in {self.additional_baseline_annotation}."
|
930
|
+
)
|
515
931
|
|
516
932
|
# set the default zarr chunk size
|
517
|
-
if self.ldscore_save_format ==
|
933
|
+
if self.ldscore_save_format == "zarr" and self.zarr_chunk_size is None:
|
518
934
|
self.zarr_chunk_size = (10_000, self.spots_per_chunk)
|
519
935
|
|
520
936
|
|
@@ -523,56 +939,61 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
523
939
|
w_file: str
|
524
940
|
# ldscore_save_dir: str
|
525
941
|
use_additional_baseline_annotation: bool = True
|
526
|
-
trait_name:
|
527
|
-
sumstats_file:
|
528
|
-
sumstats_config_file:
|
942
|
+
trait_name: str | None = None
|
943
|
+
sumstats_file: str | None = None
|
944
|
+
sumstats_config_file: str | None = None
|
529
945
|
num_processes: int = 4
|
530
946
|
not_M_5_50: bool = False
|
531
947
|
n_blocks: int = 200
|
532
|
-
chisq_max:
|
533
|
-
all_chunk:
|
534
|
-
chunk_range:
|
948
|
+
chisq_max: int | None = None
|
949
|
+
all_chunk: int | None = None
|
950
|
+
chunk_range: tuple[int, int] | None = None
|
535
951
|
|
536
|
-
ldscore_save_format: Literal[
|
952
|
+
ldscore_save_format: Literal["feather", "zarr", "quick_mode"] = "feather"
|
537
953
|
|
538
954
|
spots_per_chunk_quick_mode: int = 1_000
|
539
|
-
snp_gene_weight_adata_path:
|
955
|
+
snp_gene_weight_adata_path: str | None = None
|
540
956
|
|
541
957
|
def __post_init__(self):
|
542
958
|
super().__post_init__()
|
543
959
|
if self.sumstats_file is None and self.sumstats_config_file is None:
|
544
|
-
raise ValueError(
|
960
|
+
raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
|
545
961
|
if self.sumstats_file is not None and self.sumstats_config_file is not None:
|
546
|
-
raise ValueError(
|
962
|
+
raise ValueError(
|
963
|
+
"Only one of sumstats_file and sumstats_config_file must be provided."
|
964
|
+
)
|
547
965
|
if self.sumstats_file is not None and self.trait_name is None:
|
548
|
-
raise ValueError(
|
966
|
+
raise ValueError("trait_name must be provided if sumstats_file is provided.")
|
549
967
|
if self.sumstats_config_file is not None and self.trait_name is not None:
|
550
|
-
raise ValueError(
|
968
|
+
raise ValueError(
|
969
|
+
"trait_name must not be provided if sumstats_config_file is provided."
|
970
|
+
)
|
551
971
|
self.sumstats_config_dict = {}
|
552
972
|
# load the sumstats config file
|
553
973
|
if self.sumstats_config_file is not None:
|
554
974
|
import yaml
|
975
|
+
|
555
976
|
with open(self.sumstats_config_file) as f:
|
556
977
|
config = yaml.load(f, Loader=yaml.FullLoader)
|
557
|
-
for
|
558
|
-
assert Path(sumstats_file).exists(), f
|
978
|
+
for _trait_name, sumstats_file in config.items():
|
979
|
+
assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
|
559
980
|
# load the sumstats file
|
560
981
|
elif self.sumstats_file is not None:
|
561
982
|
self.sumstats_config_dict[self.trait_name] = self.sumstats_file
|
562
983
|
else:
|
563
|
-
raise ValueError(
|
984
|
+
raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
|
564
985
|
|
565
986
|
for sumstats_file in self.sumstats_config_dict.values():
|
566
|
-
assert Path(sumstats_file).exists(), f
|
987
|
+
assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
|
567
988
|
|
568
989
|
# check if additional baseline annotation is exist
|
569
990
|
# self.use_additional_baseline_annotation = False
|
570
|
-
|
991
|
+
|
571
992
|
if self.use_additional_baseline_annotation:
|
572
993
|
self.process_additional_baseline_annotation()
|
573
994
|
|
574
995
|
def process_additional_baseline_annotation(self):
|
575
|
-
additional_baseline_annotation = Path(self.ldscore_save_dir) /
|
996
|
+
additional_baseline_annotation = Path(self.ldscore_save_dir) / "additional_baseline"
|
576
997
|
dir_exists = additional_baseline_annotation.exists()
|
577
998
|
|
578
999
|
if not dir_exists:
|
@@ -580,7 +1001,7 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
580
1001
|
# if self.use_additional_baseline_annotation:
|
581
1002
|
# logger.warning(f"additional_baseline directory is not found in {self.ldscore_save_dir}.")
|
582
1003
|
# print('''\
|
583
|
-
# if you want to use additional baseline annotation,
|
1004
|
+
# if you want to use additional baseline annotation,
|
584
1005
|
# please provide additional baseline annotation when calculating ld score.
|
585
1006
|
# ''')
|
586
1007
|
# raise FileNotFoundError(
|
@@ -589,15 +1010,21 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
589
1010
|
# self.use_additional_baseline_annotation = self.use_additional_baseline_annotation or True
|
590
1011
|
else:
|
591
1012
|
logger.info(
|
592
|
-
|
593
|
-
|
1013
|
+
"------Additional baseline annotation is provided. It will be used with the default baseline annotation."
|
1014
|
+
)
|
1015
|
+
logger.info(
|
1016
|
+
f"------Additional baseline annotation directory: {additional_baseline_annotation}"
|
1017
|
+
)
|
594
1018
|
|
595
1019
|
chrom_list = range(1, 23)
|
596
1020
|
for chrom in chrom_list:
|
597
|
-
baseline_annotation_path =
|
1021
|
+
baseline_annotation_path = (
|
1022
|
+
additional_baseline_annotation / f"baseline.{chrom}.l2.ldscore.feather"
|
1023
|
+
)
|
598
1024
|
if not baseline_annotation_path.exists():
|
599
1025
|
raise FileNotFoundError(
|
600
|
-
f
|
1026
|
+
f"baseline.{chrom}.annot.gz is not found in {additional_baseline_annotation}."
|
1027
|
+
)
|
601
1028
|
return None
|
602
1029
|
|
603
1030
|
|
@@ -605,8 +1032,25 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
605
1032
|
class CauchyCombinationConfig(ConfigWithAutoPaths):
|
606
1033
|
trait_name: str
|
607
1034
|
annotation: str
|
608
|
-
|
609
|
-
|
1035
|
+
sample_name_list: list[str] = dataclasses.field(default_factory=list)
|
1036
|
+
output_file: str | Path | None = None
|
1037
|
+
|
1038
|
+
def __post_init__(self):
|
1039
|
+
if self.sample_name is not None:
|
1040
|
+
if len(self.sample_name_list) > 0:
|
1041
|
+
raise ValueError("Only one of sample_name and sample_name_list must be provided.")
|
1042
|
+
else:
|
1043
|
+
self.sample_name_list = [self.sample_name]
|
1044
|
+
self.output_file = (
|
1045
|
+
self.get_cauchy_result_file(self.trait_name)
|
1046
|
+
if self.output_file is None
|
1047
|
+
else self.output_file
|
1048
|
+
)
|
1049
|
+
else:
|
1050
|
+
assert len(self.sample_name_list) > 0, "At least one sample name must be provided."
|
1051
|
+
assert self.output_file is not None, (
|
1052
|
+
"Output_file must be provided if sample_name_list is provided."
|
1053
|
+
)
|
610
1054
|
|
611
1055
|
|
612
1056
|
@dataclass
|
@@ -618,7 +1062,7 @@ class VisualizeConfig(ConfigWithAutoPaths):
|
|
618
1062
|
fig_height: int = 600
|
619
1063
|
fig_width: int = 800
|
620
1064
|
point_size: int = None
|
621
|
-
fig_style: Literal[
|
1065
|
+
fig_style: Literal["dark", "light"] = "light"
|
622
1066
|
|
623
1067
|
|
624
1068
|
@dataclass
|
@@ -628,22 +1072,26 @@ class DiagnosisConfig(ConfigWithAutoPaths):
|
|
628
1072
|
|
629
1073
|
trait_name: str
|
630
1074
|
sumstats_file: str
|
631
|
-
plot_type: Literal[
|
1075
|
+
plot_type: Literal["manhattan", "GSS", "gsMap", "all"] = "all"
|
632
1076
|
top_corr_genes: int = 50
|
633
|
-
selected_genes:
|
1077
|
+
selected_genes: list[str] | None = None
|
634
1078
|
|
635
|
-
fig_width:
|
636
|
-
fig_height:
|
637
|
-
point_size:
|
638
|
-
fig_style: Literal[
|
1079
|
+
fig_width: int | None = None
|
1080
|
+
fig_height: int | None = None
|
1081
|
+
point_size: int | None = None
|
1082
|
+
fig_style: Literal["dark", "light"] = "light"
|
639
1083
|
|
640
1084
|
def __post_init__(self):
|
641
1085
|
if any([self.fig_width, self.fig_height, self.point_size]):
|
642
|
-
logger.info(
|
643
|
-
assert all([self.fig_width, self.fig_height, self.point_size]),
|
1086
|
+
logger.info("Customizing the figure size and point size.")
|
1087
|
+
assert all([self.fig_width, self.fig_height, self.point_size]), (
|
1088
|
+
"All of fig_width, fig_height, and point_size must be provided."
|
1089
|
+
)
|
644
1090
|
self.customize_fig = True
|
645
1091
|
else:
|
646
1092
|
self.customize_fig = False
|
1093
|
+
|
1094
|
+
|
647
1095
|
@dataclass
|
648
1096
|
class ReportConfig(DiagnosisConfig):
|
649
1097
|
pass
|
@@ -656,57 +1104,78 @@ class RunAllModeConfig(ConfigWithAutoPaths):
|
|
656
1104
|
# == ST DATA PARAMETERS ==
|
657
1105
|
hdf5_path: str
|
658
1106
|
annotation: str
|
659
|
-
data_layer: str =
|
1107
|
+
data_layer: str = "X"
|
1108
|
+
|
1109
|
+
# == latent 2 Gene PARAMETERS ==
|
1110
|
+
gM_slices: str | None = None
|
1111
|
+
latent_representation: str = None
|
1112
|
+
num_neighbour: int = 21
|
1113
|
+
num_neighbour_spatial: int = 101
|
660
1114
|
|
661
1115
|
# ==GWAS DATA PARAMETERS==
|
662
|
-
trait_name:
|
663
|
-
sumstats_file:
|
664
|
-
sumstats_config_file:
|
1116
|
+
trait_name: str | None = None
|
1117
|
+
sumstats_file: str | None = None
|
1118
|
+
sumstats_config_file: str | None = None
|
665
1119
|
|
666
1120
|
# === homolog PARAMETERS ===
|
667
|
-
homolog_file:
|
1121
|
+
homolog_file: str | None = None
|
668
1122
|
|
669
1123
|
max_processes: int = 10
|
670
1124
|
|
671
1125
|
def __post_init__(self):
|
672
1126
|
super().__post_init__()
|
673
|
-
self.gtffile =
|
674
|
-
|
1127
|
+
self.gtffile = (
|
1128
|
+
f"{self.gsMap_resource_dir}/genome_annotation/gtf/gencode.v39lift37.annotation.gtf"
|
1129
|
+
)
|
1130
|
+
self.bfile_root = (
|
1131
|
+
f"{self.gsMap_resource_dir}/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC"
|
1132
|
+
)
|
675
1133
|
self.keep_snp_root = f"{self.gsMap_resource_dir}/LDSC_resource/hapmap3_snps/hm"
|
676
1134
|
self.w_file = f"{self.gsMap_resource_dir}/LDSC_resource/weights_hm3_no_hla/weights."
|
677
|
-
self.snp_gene_weight_adata_path =
|
678
|
-
|
679
|
-
|
1135
|
+
self.snp_gene_weight_adata_path = (
|
1136
|
+
f"{self.gsMap_resource_dir}/quick_mode/snp_gene_weight_matrix.h5ad"
|
1137
|
+
)
|
1138
|
+
self.baseline_annotation_dir = Path(
|
1139
|
+
f"{self.gsMap_resource_dir}/quick_mode/baseline"
|
1140
|
+
).resolve()
|
1141
|
+
self.SNP_gene_pair_dir = Path(
|
1142
|
+
f"{self.gsMap_resource_dir}/quick_mode/SNP_gene_pair"
|
1143
|
+
).resolve()
|
680
1144
|
# check the existence of the input files and resources files
|
681
1145
|
for file in [self.hdf5_path, self.gtffile]:
|
682
1146
|
if not Path(file).exists():
|
683
1147
|
raise FileNotFoundError(f"File {file} does not exist.")
|
684
1148
|
|
685
1149
|
if self.sumstats_file is None and self.sumstats_config_file is None:
|
686
|
-
raise ValueError(
|
1150
|
+
raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
|
687
1151
|
if self.sumstats_file is not None and self.sumstats_config_file is not None:
|
688
|
-
raise ValueError(
|
1152
|
+
raise ValueError(
|
1153
|
+
"Only one of sumstats_file and sumstats_config_file must be provided."
|
1154
|
+
)
|
689
1155
|
if self.sumstats_file is not None and self.trait_name is None:
|
690
|
-
raise ValueError(
|
1156
|
+
raise ValueError("trait_name must be provided if sumstats_file is provided.")
|
691
1157
|
if self.sumstats_config_file is not None and self.trait_name is not None:
|
692
|
-
raise ValueError(
|
1158
|
+
raise ValueError(
|
1159
|
+
"trait_name must not be provided if sumstats_config_file is provided."
|
1160
|
+
)
|
693
1161
|
self.sumstats_config_dict = {}
|
694
1162
|
# load the sumstats config file
|
695
1163
|
if self.sumstats_config_file is not None:
|
696
1164
|
import yaml
|
1165
|
+
|
697
1166
|
with open(self.sumstats_config_file) as f:
|
698
1167
|
config = yaml.load(f, Loader=yaml.FullLoader)
|
699
1168
|
for trait_name, sumstats_file in config.items():
|
700
|
-
assert Path(sumstats_file).exists(), f
|
1169
|
+
assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
|
701
1170
|
self.sumstats_config_dict[trait_name] = sumstats_file
|
702
1171
|
# load the sumstats file
|
703
1172
|
elif self.sumstats_file is not None and self.trait_name is not None:
|
704
1173
|
self.sumstats_config_dict[self.trait_name] = self.sumstats_file
|
705
1174
|
else:
|
706
|
-
raise ValueError(
|
1175
|
+
raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
|
707
1176
|
|
708
1177
|
for sumstats_file in self.sumstats_config_dict.values():
|
709
|
-
assert Path(sumstats_file).exists(), f
|
1178
|
+
assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
|
710
1179
|
|
711
1180
|
|
712
1181
|
@dataclass
|
@@ -729,78 +1198,115 @@ class FormatSumstatsConfig:
|
|
729
1198
|
format: str = None
|
730
1199
|
chr: str = None
|
731
1200
|
pos: str = None
|
732
|
-
chunksize: int =
|
1201
|
+
chunksize: int = 1e7
|
733
1202
|
info_min: float = 0.9
|
734
1203
|
maf_min: float = 0.01
|
735
1204
|
keep_chr_pos: bool = False
|
736
1205
|
|
737
1206
|
|
738
|
-
@register_cli(
|
739
|
-
|
740
|
-
|
1207
|
+
@register_cli(
|
1208
|
+
name="run_find_latent_representations",
|
1209
|
+
description="Run Find_latent_representations \nFind the latent representations of each spot by running GNN-VAE",
|
1210
|
+
add_args_function=add_find_latent_representations_args,
|
1211
|
+
)
|
741
1212
|
def run_find_latent_representation_from_cli(args: argparse.Namespace):
|
742
1213
|
from gsMap.find_latent_representation import run_find_latent_representation
|
1214
|
+
|
743
1215
|
config = get_dataclass_from_parser(args, FindLatentRepresentationsConfig)
|
744
1216
|
run_find_latent_representation(config)
|
745
1217
|
|
746
1218
|
|
747
|
-
@register_cli(
|
748
|
-
|
749
|
-
|
1219
|
+
@register_cli(
|
1220
|
+
name="run_latent_to_gene",
|
1221
|
+
description="Run Latent_to_gene \nEstimate gene marker gene scores for each spot by using latent representations from nearby spots",
|
1222
|
+
add_args_function=add_latent_to_gene_args,
|
1223
|
+
)
|
750
1224
|
def run_latent_to_gene_from_cli(args: argparse.Namespace):
|
751
1225
|
from gsMap.latent_to_gene import run_latent_to_gene
|
1226
|
+
|
752
1227
|
config = get_dataclass_from_parser(args, LatentToGeneConfig)
|
753
1228
|
run_latent_to_gene(config)
|
754
1229
|
|
755
1230
|
|
756
|
-
@register_cli(
|
757
|
-
|
758
|
-
|
1231
|
+
@register_cli(
|
1232
|
+
name="run_generate_ldscore",
|
1233
|
+
description="Run Generate_ldscore \nGenerate LD scores for each spot",
|
1234
|
+
add_args_function=add_generate_ldscore_args,
|
1235
|
+
)
|
759
1236
|
def run_generate_ldscore_from_cli(args: argparse.Namespace):
|
760
1237
|
from gsMap.generate_ldscore import run_generate_ldscore
|
1238
|
+
|
761
1239
|
config = get_dataclass_from_parser(args, GenerateLDScoreConfig)
|
762
1240
|
run_generate_ldscore(config)
|
763
1241
|
|
764
1242
|
|
765
|
-
@register_cli(
|
766
|
-
|
767
|
-
|
1243
|
+
@register_cli(
|
1244
|
+
name="run_spatial_ldsc",
|
1245
|
+
description="Run Spatial_ldsc \nRun spatial LDSC for each spot",
|
1246
|
+
add_args_function=add_spatial_ldsc_args,
|
1247
|
+
)
|
768
1248
|
def run_spatial_ldsc_from_cli(args: argparse.Namespace):
|
769
1249
|
from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
|
1250
|
+
|
770
1251
|
config = get_dataclass_from_parser(args, SpatialLDSCConfig)
|
771
1252
|
run_spatial_ldsc(config)
|
772
1253
|
|
773
1254
|
|
774
|
-
@register_cli(
|
775
|
-
|
776
|
-
|
1255
|
+
@register_cli(
|
1256
|
+
name="run_cauchy_combination",
|
1257
|
+
description="Run Cauchy_combination for each annotation",
|
1258
|
+
add_args_function=add_Cauchy_combination_args,
|
1259
|
+
)
|
777
1260
|
def run_Cauchy_combination_from_cli(args: argparse.Namespace):
|
778
1261
|
from gsMap.cauchy_combination_test import run_Cauchy_combination
|
1262
|
+
|
779
1263
|
config = get_dataclass_from_parser(args, CauchyCombinationConfig)
|
780
1264
|
run_Cauchy_combination(config)
|
781
1265
|
|
782
1266
|
|
783
|
-
@register_cli(
|
784
|
-
|
785
|
-
|
1267
|
+
@register_cli(
|
1268
|
+
name="run_report",
|
1269
|
+
description="Run Report to generate diagnostic plots and tables",
|
1270
|
+
add_args_function=add_report_args,
|
1271
|
+
)
|
786
1272
|
def run_Report_from_cli(args: argparse.Namespace):
|
787
1273
|
from gsMap.report import run_report
|
1274
|
+
|
788
1275
|
config = get_dataclass_from_parser(args, ReportConfig)
|
789
1276
|
run_report(config)
|
790
1277
|
|
791
1278
|
|
792
|
-
@register_cli(
|
793
|
-
|
794
|
-
|
1279
|
+
@register_cli(
|
1280
|
+
name="format_sumstats",
|
1281
|
+
description="Format gwas summary statistics",
|
1282
|
+
add_args_function=add_format_sumstats_args,
|
1283
|
+
)
|
795
1284
|
def gwas_format_from_cli(args: argparse.Namespace):
|
796
1285
|
from gsMap.format_sumstats import gwas_format
|
1286
|
+
|
797
1287
|
config = get_dataclass_from_parser(args, FormatSumstatsConfig)
|
798
1288
|
gwas_format(config)
|
799
1289
|
|
800
|
-
|
801
|
-
|
802
|
-
|
1290
|
+
|
1291
|
+
@register_cli(
|
1292
|
+
name="quick_mode",
|
1293
|
+
description="Run all the gsMap pipeline in quick mode",
|
1294
|
+
add_args_function=add_run_all_mode_args,
|
1295
|
+
)
|
803
1296
|
def run_all_mode_from_cli(args: argparse.Namespace):
|
804
1297
|
from gsMap.run_all_mode import run_pipeline
|
1298
|
+
|
805
1299
|
config = get_dataclass_from_parser(args, RunAllModeConfig)
|
806
1300
|
run_pipeline(config)
|
1301
|
+
|
1302
|
+
|
1303
|
+
@register_cli(
|
1304
|
+
name="create_slice_mean",
|
1305
|
+
description="Create slice mean from multiple h5ad files",
|
1306
|
+
add_args_function=add_create_slice_mean_args,
|
1307
|
+
)
|
1308
|
+
def create_slice_mean_from_cli(args: argparse.Namespace):
|
1309
|
+
from gsMap.create_slice_mean import run_create_slice_mean
|
1310
|
+
|
1311
|
+
config = get_dataclass_from_parser(args, CreateSliceMeanConfig)
|
1312
|
+
run_create_slice_mean(config)
|