gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/config.py CHANGED
@@ -1,75 +1,118 @@
1
- import sys
2
1
  import argparse
2
+ import dataclasses
3
3
  import logging
4
+ import sys
4
5
  from collections import OrderedDict, namedtuple
6
+ from collections.abc import Callable
5
7
  from dataclasses import dataclass
8
+ from functools import wraps
6
9
  from pathlib import Path
7
10
  from pprint import pprint
8
- from typing import Callable
9
- from typing import Union, Literal, Tuple, Optional, List
10
- from functools import wraps
11
+ from typing import Literal
12
+
11
13
  import pyfiglet
14
+ import yaml
12
15
 
13
16
  from gsMap.__init__ import __version__
14
17
 
15
18
  # Global registry to hold functions
16
19
  cli_function_registry = OrderedDict()
17
- subcommand = namedtuple('subcommand', ['name', 'func', 'add_args_function', 'description'])
20
+ subcommand = namedtuple("subcommand", ["name", "func", "add_args_function", "description"])
18
21
 
19
22
 
20
23
  def get_gsMap_logger(logger_name):
21
24
  logger = logging.getLogger(logger_name)
22
25
  logger.setLevel(logging.DEBUG)
23
26
  handler = logging.StreamHandler()
24
- handler.setFormatter(logging.Formatter(
25
- '[{asctime}] {levelname:.5s} | {name} - {message}', style='{'))
27
+ handler.setFormatter(
28
+ logging.Formatter("[{asctime}] {levelname:.5s} | {name} - {message}", style="{")
29
+ )
26
30
  logger.addHandler(handler)
27
31
  return logger
28
32
 
29
- logger = get_gsMap_logger('gsMap')
33
+
34
+ logger = get_gsMap_logger("gsMap")
35
+
30
36
 
31
37
  # Decorator to register functions for cli parsing
32
38
  def register_cli(name: str, description: str, add_args_function: Callable) -> Callable:
33
39
  def decorator(func: Callable) -> Callable:
34
40
  def wrapper(*args, **kwargs):
35
- name.replace('_', ' ')
36
- gsMap_main_logo = pyfiglet.figlet_format("gsMap", font='doom', width=80, justify='center', ).rstrip()
41
+ name.replace("_", " ")
42
+ gsMap_main_logo = pyfiglet.figlet_format(
43
+ "gsMap",
44
+ font="doom",
45
+ width=80,
46
+ justify="center",
47
+ ).rstrip()
37
48
  print(gsMap_main_logo, flush=True)
38
- version_number = 'Version: ' + __version__
49
+ version_number = "Version: " + __version__
39
50
  print(version_number.center(80), flush=True)
40
- print('=' * 80, flush=True)
51
+ print("=" * 80, flush=True)
41
52
  logger.info(f"Running {name}...")
42
53
  func(*args, **kwargs)
43
54
  logger.info(f"Finished running {name}.")
44
55
 
45
- cli_function_registry[name] = subcommand(name=name, func=wrapper, add_args_function=add_args_function,
46
- description=description)
56
+ cli_function_registry[name] = subcommand(
57
+ name=name, func=wrapper, add_args_function=add_args_function, description=description
58
+ )
47
59
  return wrapper
48
60
 
49
61
  return decorator
50
62
 
63
+
51
64
  def add_shared_args(parser):
52
- parser.add_argument('--workdir', type=str, required=True, help='Path to the working directory.')
53
- parser.add_argument('--sample_name', type=str, required=True, help='Name of the sample.')
65
+ parser.add_argument(
66
+ "--workdir", type=str, required=True, help="Path to the working directory."
67
+ )
68
+ parser.add_argument("--sample_name", type=str, required=True, help="Name of the sample.")
69
+
54
70
 
55
71
  def add_find_latent_representations_args(parser):
56
72
  add_shared_args(parser)
57
- parser.add_argument('--input_hdf5_path', required=True, type=str, help='Path to the input HDF5 file.')
58
- parser.add_argument('--annotation', required=True, type=str, help='Name of the annotation in adata.obs to use.')
59
- parser.add_argument('--data_layer', type=str, default='counts', required=True,
60
- help='Data layer for gene expression (e.g., "count", "counts", "log1p").')
61
- parser.add_argument('--epochs', type=int, default=300, help='Number of training epochs.')
62
- parser.add_argument('--feat_hidden1', type=int, default=256, help='Neurons in the first hidden layer.')
63
- parser.add_argument('--feat_hidden2', type=int, default=128, help='Neurons in the second hidden layer.')
64
- parser.add_argument('--gat_hidden1', type=int, default=64, help='Units in the first GAT hidden layer.')
65
- parser.add_argument('--gat_hidden2', type=int, default=30, help='Units in the second GAT hidden layer.')
66
- parser.add_argument('--p_drop', type=float, default=0.1, help='Dropout rate.')
67
- parser.add_argument('--gat_lr', type=float, default=0.001, help='Learning rate for the GAT.')
68
- parser.add_argument('--n_neighbors', type=int, default=11, help='Number of neighbors for GAT.')
69
- parser.add_argument('--n_comps', type=int, default=300, help='Number of principal components for PCA.')
70
- parser.add_argument('--weighted_adj', action='store_true', help='Use weighted adjacency in GAT.')
71
- parser.add_argument('--convergence_threshold', type=float, default=1e-4, help='Threshold for convergence.')
72
- parser.add_argument('--hierarchically', action='store_true', help='Enable hierarchical latent representation finding.')
73
+ parser.add_argument(
74
+ "--input_hdf5_path", required=True, type=str, help="Path to the input HDF5 file."
75
+ )
76
+ parser.add_argument(
77
+ "--annotation", required=True, type=str, help="Name of the annotation in adata.obs to use."
78
+ )
79
+ parser.add_argument(
80
+ "--data_layer",
81
+ type=str,
82
+ default="counts",
83
+ required=True,
84
+ help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
85
+ )
86
+ parser.add_argument("--epochs", type=int, default=300, help="Number of training epochs.")
87
+ parser.add_argument(
88
+ "--feat_hidden1", type=int, default=256, help="Neurons in the first hidden layer."
89
+ )
90
+ parser.add_argument(
91
+ "--feat_hidden2", type=int, default=128, help="Neurons in the second hidden layer."
92
+ )
93
+ parser.add_argument(
94
+ "--gat_hidden1", type=int, default=64, help="Units in the first GAT hidden layer."
95
+ )
96
+ parser.add_argument(
97
+ "--gat_hidden2", type=int, default=30, help="Units in the second GAT hidden layer."
98
+ )
99
+ parser.add_argument("--p_drop", type=float, default=0.1, help="Dropout rate.")
100
+ parser.add_argument("--gat_lr", type=float, default=0.001, help="Learning rate for the GAT.")
101
+ parser.add_argument("--n_neighbors", type=int, default=11, help="Number of neighbors for GAT.")
102
+ parser.add_argument(
103
+ "--n_comps", type=int, default=300, help="Number of principal components for PCA."
104
+ )
105
+ parser.add_argument(
106
+ "--weighted_adj", action="store_true", help="Use weighted adjacency in GAT."
107
+ )
108
+ parser.add_argument(
109
+ "--convergence_threshold", type=float, default=1e-4, help="Threshold for convergence."
110
+ )
111
+ parser.add_argument(
112
+ "--hierarchically",
113
+ action="store_true",
114
+ help="Enable hierarchical latent representation finding.",
115
+ )
73
116
 
74
117
 
75
118
  def chrom_choice(value):
@@ -77,10 +120,12 @@ def chrom_choice(value):
77
120
  ivalue = int(value)
78
121
  if 1 <= ivalue <= 22:
79
122
  return ivalue
80
- elif value.lower() == 'all':
123
+ elif value.lower() == "all":
81
124
  return value
82
125
  else:
83
- raise argparse.ArgumentTypeError(f"'{value}' is an invalid chromosome choice. Choose from 1-22 or 'all'.")
126
+ raise argparse.ArgumentTypeError(
127
+ f"'{value}' is an invalid chromosome choice. Choose from 1-22 or 'all'."
128
+ )
84
129
 
85
130
 
86
131
  def filter_args_for_dataclass(args_dict, data_class: dataclass):
@@ -89,7 +134,7 @@ def filter_args_for_dataclass(args_dict, data_class: dataclass):
89
134
 
90
135
  def get_dataclass_from_parser(args: argparse.Namespace, data_class: dataclass):
91
136
  remain_kwargs = filter_args_for_dataclass(vars(args), data_class)
92
- print(f'Using the following arguments for {data_class.__name__}:', flush=True)
137
+ print(f"Using the following arguments for {data_class.__name__}:", flush=True)
93
138
  pprint(remain_kwargs, indent=4)
94
139
  sys.stdout.flush()
95
140
  return data_class(**remain_kwargs)
@@ -97,178 +142,423 @@ def get_dataclass_from_parser(args: argparse.Namespace, data_class: dataclass):
97
142
 
98
143
  def add_latent_to_gene_args(parser):
99
144
  add_shared_args(parser)
100
- parser.add_argument('--annotation', type=str, help='Name of the annotation in adata.obs to use. (optional).')
101
- parser.add_argument('--no_expression_fraction', action='store_true', help='Skip expression fraction filtering.')
102
- parser.add_argument('--latent_representation', type=str, choices=['latent_GVAE', 'latent_PCA'], default='latent_GVAE',
103
- help='Type of latent representation.')
104
- parser.add_argument('--num_neighbour', type=int, default=21, help='Number of neighbors.')
105
- parser.add_argument('--num_neighbour_spatial', type=int, default=101, help='Number of spatial neighbors.')
106
- # parser.add_argument('--species', type=str, help='Species name for homolog gene mapping (optional).')
107
- parser.add_argument('--homolog_file', type=str, help='Path to homologous gene conversion file (optional).')
108
145
 
109
-
110
- def add_generate_ldscore_args(parser):
111
- add_shared_args(parser)
112
- parser.add_argument('--chrom', type=str, required=True, help='Chromosome id (1-22) or "all".')
113
- parser.add_argument('--bfile_root', type=str, required=True, help='Root path for genotype plink bfiles (.bim, .bed, .fam).')
114
- parser.add_argument('--keep_snp_root', type=str, required=True, help='Root path for SNP files.')
115
- parser.add_argument('--gtf_annotation_file', type=str, required=True, help='Path to GTF annotation file.')
116
- parser.add_argument('--gene_window_size', type=int, default=50000, help='Gene window size in base pairs.')
117
- parser.add_argument('--enhancer_annotation_file', type=str, help='Path to enhancer annotation file (optional).')
118
- parser.add_argument('--snp_multiple_enhancer_strategy', type=str, choices=['max_mkscore', 'nearest_TSS'], default='max_mkscore',
119
- help='Strategy for handling multiple enhancers per SNP.')
120
- parser.add_argument('--gene_window_enhancer_priority', type=str, choices=['gene_window_first', 'enhancer_first', 'enhancer_only'],
121
- help='Priority between gene window and enhancer annotations.')
122
- parser.add_argument('--spots_per_chunk', type=int, default=1000, help='Number of spots per chunk.')
123
- parser.add_argument('--ld_wind', type=int, default=1, help='LD window size.')
124
- parser.add_argument('--ld_unit', type=str, choices=['SNP', 'KB', 'CM'], default='CM', help='Unit for LD window.')
125
- parser.add_argument('--additional_baseline_annotation', type=str, default=None, help='Path of additional baseline annotations')
146
+ parser.add_argument(
147
+ "--input_hdf5_path",
148
+ type=str,
149
+ default=None,
150
+ help="Path to the input HDF5 file with latent representations, if --latent_representation is specified.",
151
+ )
152
+ parser.add_argument(
153
+ "--no_expression_fraction", action="store_true", help="Skip expression fraction filtering."
154
+ )
155
+ parser.add_argument(
156
+ "--latent_representation",
157
+ type=str,
158
+ default=None,
159
+ help="Type of latent representation. This should exist in the h5ad obsm.",
160
+ )
161
+ parser.add_argument("--num_neighbour", type=int, default=21, help="Number of neighbors.")
162
+ parser.add_argument(
163
+ "--num_neighbour_spatial", type=int, default=101, help="Number of spatial neighbors."
164
+ )
165
+ parser.add_argument(
166
+ "--homolog_file",
167
+ type=str,
168
+ default=None,
169
+ help="Path to homologous gene conversion file (optional).",
170
+ )
171
+ parser.add_argument(
172
+ "--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
173
+ )
174
+ parser.add_argument(
175
+ "--annotation",
176
+ type=str,
177
+ default=None,
178
+ help="Name of the annotation in adata.obs to use (optional).",
179
+ )
126
180
 
127
181
 
128
- def add_latent_to_gene_args(parser):
182
+ def add_generate_ldscore_args(parser):
129
183
  add_shared_args(parser)
130
- parser.add_argument('--annotation', type=str, required=True, help='Name of the annotation layer.')
131
- parser.add_argument('--no_expression_fraction', action='store_true', help='Skip expression fraction filtering.')
132
- parser.add_argument('--latent_representation', type=str, choices=['latent_GVAE', 'latent_PCA'], default='latent_GVAE',
133
- help='Type of latent representation.')
134
- parser.add_argument('--num_neighbour', type=int, default=21, help='Number of neighbors.')
135
- parser.add_argument('--num_neighbour_spatial', type=int, default=101, help='Number of spatial neighbors.')
136
- # parser.add_argument('--species', type=str, help='Species name for homolog gene mapping (optional).')
137
- parser.add_argument('--homolog_file', type=str, help='Path to homologous gene conversion file (optional).')
184
+ parser.add_argument("--chrom", type=str, required=True, help='Chromosome id (1-22) or "all".')
185
+ parser.add_argument(
186
+ "--bfile_root",
187
+ type=str,
188
+ required=True,
189
+ help="Root path for genotype plink bfiles (.bim, .bed, .fam).",
190
+ )
191
+ parser.add_argument(
192
+ "--keep_snp_root", type=str, required=True, help="Root path for SNP files."
193
+ )
194
+ parser.add_argument(
195
+ "--gtf_annotation_file", type=str, required=True, help="Path to GTF annotation file."
196
+ )
197
+ parser.add_argument(
198
+ "--gene_window_size", type=int, default=50000, help="Gene window size in base pairs."
199
+ )
200
+ parser.add_argument(
201
+ "--enhancer_annotation_file", type=str, help="Path to enhancer annotation file (optional)."
202
+ )
203
+ parser.add_argument(
204
+ "--snp_multiple_enhancer_strategy",
205
+ type=str,
206
+ choices=["max_mkscore", "nearest_TSS"],
207
+ default="max_mkscore",
208
+ help="Strategy for handling multiple enhancers per SNP.",
209
+ )
210
+ parser.add_argument(
211
+ "--gene_window_enhancer_priority",
212
+ type=str,
213
+ choices=["gene_window_first", "enhancer_first", "enhancer_only"],
214
+ help="Priority between gene window and enhancer annotations.",
215
+ )
216
+ parser.add_argument(
217
+ "--spots_per_chunk", type=int, default=1000, help="Number of spots per chunk."
218
+ )
219
+ parser.add_argument("--ld_wind", type=int, default=1, help="LD window size.")
220
+ parser.add_argument(
221
+ "--ld_unit",
222
+ type=str,
223
+ choices=["SNP", "KB", "CM"],
224
+ default="CM",
225
+ help="Unit for LD window.",
226
+ )
227
+ parser.add_argument(
228
+ "--additional_baseline_annotation",
229
+ type=str,
230
+ default=None,
231
+ help="Path of additional baseline annotations",
232
+ )
138
233
 
139
234
 
140
235
  def add_spatial_ldsc_args(parser):
141
236
  add_shared_args(parser)
142
- parser.add_argument('--sumstats_file', type=str, required=True, help='Path to GWAS summary statistics file.')
143
- parser.add_argument('--w_file', type=str, required=True, help='Path to regression weight file.')
144
- parser.add_argument('--trait_name', type=str, required=True, help='Name of the trait being analyzed.')
145
- parser.add_argument('--n_blocks', type=int, default=200, help='Number of blocks for jackknife resampling.')
146
- parser.add_argument('--chisq_max', type=int, help='Maximum chi-square value for filtering SNPs.')
147
- parser.add_argument('--num_processes', type=int, default=4, help='Number of processes for parallel computing.')
148
- parser.add_argument('--use_additional_baseline_annotation', type=bool, nargs='?', const=True, default=True, help='Use additional baseline annotations when provided')
237
+ parser.add_argument(
238
+ "--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
239
+ )
240
+ parser.add_argument(
241
+ "--w_file", type=str, required=True, help="Path to regression weight file."
242
+ )
243
+ parser.add_argument(
244
+ "--trait_name", type=str, required=True, help="Name of the trait being analyzed."
245
+ )
246
+ parser.add_argument(
247
+ "--n_blocks", type=int, default=200, help="Number of blocks for jackknife resampling."
248
+ )
249
+ parser.add_argument(
250
+ "--chisq_max", type=int, help="Maximum chi-square value for filtering SNPs."
251
+ )
252
+ parser.add_argument(
253
+ "--num_processes", type=int, default=4, help="Number of processes for parallel computing."
254
+ )
255
+ parser.add_argument(
256
+ "--use_additional_baseline_annotation",
257
+ type=bool,
258
+ nargs="?",
259
+ const=True,
260
+ default=True,
261
+ help="Use additional baseline annotations when provided",
262
+ )
149
263
 
150
264
 
151
265
  def add_Cauchy_combination_args(parser):
152
- add_shared_args(parser)
153
- parser.add_argument('--trait_name', type=str, required=True, help='Name of the trait being analyzed.')
154
- parser.add_argument('--annotation', type=str, required=True, help='Name of the annotation in adata.obs to use.')
155
- parser.add_argument('--meta', type=str, help='Optional meta information.')
156
- parser.add_argument('--slide', type=str, help='Optional slide information.')
266
+ parser.add_argument(
267
+ "--workdir", type=str, required=True, help="Path to the working directory."
268
+ )
269
+ parser.add_argument("--sample_name", type=str, required=False, help="Name of the sample.")
270
+
271
+ parser.add_argument(
272
+ "--trait_name", type=str, required=True, help="Name of the trait being analyzed."
273
+ )
274
+ parser.add_argument(
275
+ "--annotation", type=str, required=True, help="Name of the annotation in adata.obs to use."
276
+ )
277
+
278
+ parser.add_argument(
279
+ "--sample_name_list",
280
+ type=str,
281
+ nargs="+",
282
+ required=False,
283
+ help="List of sample names to process. Provide as a space-separated list.",
284
+ )
285
+ parser.add_argument(
286
+ "--output_file",
287
+ type=str,
288
+ required=False,
289
+ help="Path to save the combined Cauchy results. Required when using multiple samples.",
290
+ )
157
291
 
158
292
 
159
293
  def add_report_args(parser):
160
294
  add_shared_args(parser)
161
- parser.add_argument('--trait_name', type=str, required=True, help='Name of the trait to generate the report for.')
162
- parser.add_argument('--annotation', type=str, required=True, help='Annotation layer name.')
295
+ parser.add_argument(
296
+ "--trait_name",
297
+ type=str,
298
+ required=True,
299
+ help="Name of the trait to generate the report for.",
300
+ )
301
+ parser.add_argument("--annotation", type=str, required=True, help="Annotation layer name.")
163
302
  # parser.add_argument('--plot_type', type=str, choices=['manhattan', 'GSS', 'gsMap', 'all'], default='all',
164
303
  # help="Type of diagnostic plot to generate. Choose from 'manhattan', 'GSS', 'gsMap', or 'all'.")
165
- parser.add_argument('--top_corr_genes', type=int, default=50,
166
- help='Number of top correlated genes to display.')
167
- parser.add_argument('--selected_genes', type=str, nargs='*',
168
- help='List of specific genes to include in the report (optional).')
169
- parser.add_argument('--sumstats_file', type=str, required=True, help='Path to GWAS summary statistics file.')
304
+ parser.add_argument(
305
+ "--top_corr_genes", type=int, default=50, help="Number of top correlated genes to display."
306
+ )
307
+ parser.add_argument(
308
+ "--selected_genes",
309
+ type=str,
310
+ nargs="*",
311
+ help="List of specific genes to include in the report (optional).",
312
+ )
313
+ parser.add_argument(
314
+ "--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
315
+ )
170
316
 
171
317
  # Optional arguments for customization
172
- parser.add_argument('--fig_width', type=int, default=None, help='Width of the generated figures in pixels.')
173
- parser.add_argument('--fig_height', type=int, default=None, help='Height of the generated figures in pixels.')
174
- parser.add_argument('--point_size', type=int, default=None, help='Point size for the figures.')
175
- parser.add_argument('--fig_style', type=str, default='light', choices=['dark', 'light'],
176
- help='Style of the generated figures.')
318
+ parser.add_argument(
319
+ "--fig_width", type=int, default=None, help="Width of the generated figures in pixels."
320
+ )
321
+ parser.add_argument(
322
+ "--fig_height", type=int, default=None, help="Height of the generated figures in pixels."
323
+ )
324
+ parser.add_argument("--point_size", type=int, default=None, help="Point size for the figures.")
325
+ parser.add_argument(
326
+ "--fig_style",
327
+ type=str,
328
+ default="light",
329
+ choices=["dark", "light"],
330
+ help="Style of the generated figures.",
331
+ )
332
+
333
+
334
+ def add_create_slice_mean_args(parser):
335
+ parser.add_argument(
336
+ "--sample_name_list",
337
+ type=str,
338
+ nargs="+",
339
+ required=True,
340
+ help="List of sample names to process. Provide as a space-separated list.",
341
+ )
342
+
343
+ parser.add_argument(
344
+ "--h5ad_list",
345
+ type=str,
346
+ nargs="+",
347
+ help="List of h5ad file paths corresponding to the sample names. Provide as a space-separated list.",
348
+ )
349
+ parser.add_argument(
350
+ "--h5ad_yaml",
351
+ type=str,
352
+ default=None,
353
+ help="Path to the YAML file containing sample names and associated h5ad file paths",
354
+ )
355
+ parser.add_argument(
356
+ "--slice_mean_output_file",
357
+ type=str,
358
+ required=True,
359
+ help="Path to the output file for the slice mean",
360
+ )
361
+ parser.add_argument(
362
+ "--homolog_file", type=str, help="Path to homologous gene conversion file (optional)."
363
+ )
364
+ parser.add_argument(
365
+ "--data_layer",
366
+ type=str,
367
+ default="counts",
368
+ required=True,
369
+ help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
370
+ )
371
+
177
372
 
178
373
  def add_format_sumstats_args(parser):
179
374
  # Required arguments
180
- parser.add_argument('--sumstats', required=True, type=str,
181
- help='Path to gwas summary data')
182
- parser.add_argument('--out', required=True, type=str,
183
- help='Path to save the formatted gwas data')
375
+ parser.add_argument("--sumstats", required=True, type=str, help="Path to gwas summary data")
376
+ parser.add_argument(
377
+ "--out", required=True, type=str, help="Path to save the formatted gwas data"
378
+ )
184
379
 
185
380
  # Arguments for specify column name
186
- parser.add_argument('--snp', default=None, type=str,
187
- help="Name of snp column (if not a name that gsMap understands)")
188
- parser.add_argument('--a1', default=None, type=str,
189
- help="Name of effect allele column (if not a name that gsMap understands)")
190
- parser.add_argument('--a2', default=None, type=str,
191
- help="Name of none-effect allele column (if not a name that gsMap understands)")
192
- parser.add_argument('--info', default=None, type=str,
193
- help="Name of info column (if not a name that gsMap understands)")
194
- parser.add_argument('--beta', default=None, type=str,
195
- help="Name of gwas beta column (if not a name that gsMap understands).")
196
- parser.add_argument('--se', default=None, type=str,
197
- help="Name of gwas standar error of beta column (if not a name that gsMap understands)")
198
- parser.add_argument('--p', default=None, type=str,
199
- help="Name of p-value column (if not a name that gsMap understands)")
200
- parser.add_argument('--frq', default=None, type=str,
201
- help="Name of A1 ferquency column (if not a name that gsMap understands)")
202
- parser.add_argument('--n', default=None, type=str,
203
- help="Name of sample size column (if not a name that gsMap understands)")
204
- parser.add_argument('--z', default=None, type=str,
205
- help="Name of gwas Z-statistics column (if not a name that gsMap understands)")
206
- parser.add_argument('--OR', default=None, type=str,
207
- help="Name of gwas OR column (if not a name that gsMap understands)")
208
- parser.add_argument('--se_OR', default=None, type=str,
209
- help="Name of standar error of OR column (if not a name that gsMap understands)")
381
+ parser.add_argument(
382
+ "--snp",
383
+ default=None,
384
+ type=str,
385
+ help="Name of snp column (if not a name that gsMap understands)",
386
+ )
387
+ parser.add_argument(
388
+ "--a1",
389
+ default=None,
390
+ type=str,
391
+ help="Name of effect allele column (if not a name that gsMap understands)",
392
+ )
393
+ parser.add_argument(
394
+ "--a2",
395
+ default=None,
396
+ type=str,
397
+ help="Name of none-effect allele column (if not a name that gsMap understands)",
398
+ )
399
+ parser.add_argument(
400
+ "--info",
401
+ default=None,
402
+ type=str,
403
+ help="Name of info column (if not a name that gsMap understands)",
404
+ )
405
+ parser.add_argument(
406
+ "--beta",
407
+ default=None,
408
+ type=str,
409
+ help="Name of gwas beta column (if not a name that gsMap understands).",
410
+ )
411
+ parser.add_argument(
412
+ "--se",
413
+ default=None,
414
+ type=str,
415
+ help="Name of gwas standar error of beta column (if not a name that gsMap understands)",
416
+ )
417
+ parser.add_argument(
418
+ "--p",
419
+ default=None,
420
+ type=str,
421
+ help="Name of p-value column (if not a name that gsMap understands)",
422
+ )
423
+ parser.add_argument(
424
+ "--frq",
425
+ default=None,
426
+ type=str,
427
+ help="Name of A1 ferquency column (if not a name that gsMap understands)",
428
+ )
429
+ parser.add_argument(
430
+ "--n",
431
+ default=None,
432
+ type=str,
433
+ help="Name of sample size column (if not a name that gsMap understands)",
434
+ )
435
+ parser.add_argument(
436
+ "--z",
437
+ default=None,
438
+ type=str,
439
+ help="Name of gwas Z-statistics column (if not a name that gsMap understands)",
440
+ )
441
+ parser.add_argument(
442
+ "--OR",
443
+ default=None,
444
+ type=str,
445
+ help="Name of gwas OR column (if not a name that gsMap understands)",
446
+ )
447
+ parser.add_argument(
448
+ "--se_OR",
449
+ default=None,
450
+ type=str,
451
+ help="Name of standar error of OR column (if not a name that gsMap understands)",
452
+ )
210
453
 
211
454
  # Arguments for convert SNP (chr, pos) to rsid
212
- parser.add_argument('--chr', default="Chr", type=str,
213
- help="Name of SNP chromosome column (if not a name that gsMap understands)")
214
- parser.add_argument('--pos', default="Pos", type=str,
215
- help="Name of SNP positions column (if not a name that gsMap understands)")
216
- parser.add_argument('--dbsnp', default=None, type=str,
217
- help='Path to reference dnsnp file')
218
- parser.add_argument('--chunksize', default=1e+6, type=int,
219
- help='Chunk size for loading dbsnp file')
455
+ parser.add_argument(
456
+ "--chr",
457
+ default="Chr",
458
+ type=str,
459
+ help="Name of SNP chromosome column (if not a name that gsMap understands)",
460
+ )
461
+ parser.add_argument(
462
+ "--pos",
463
+ default="Pos",
464
+ type=str,
465
+ help="Name of SNP positions column (if not a name that gsMap understands)",
466
+ )
467
+ parser.add_argument("--dbsnp", default=None, type=str, help="Path to reference dnsnp file")
468
+ parser.add_argument(
469
+ "--chunksize", default=1e6, type=int, help="Chunk size for loading dbsnp file"
470
+ )
220
471
 
221
472
  # Arguments for output format and quality
222
- parser.add_argument('--format', default='gsMap', type=str,
223
- help='Format of output data', choices=['gsMap', 'COJO'])
224
- parser.add_argument('--info_min', default=0.9, type=float,
225
- help='Minimum INFO score.')
226
- parser.add_argument('--maf_min', default=0.01, type=float,
227
- help='Minimum MAF.')
228
- parser.add_argument('--keep_chr_pos', action='store_true', default=False,
229
- help='Keep SNP chromosome and position columns in the output data')
473
+ parser.add_argument(
474
+ "--format",
475
+ default="gsMap",
476
+ type=str,
477
+ help="Format of output data",
478
+ choices=["gsMap", "COJO"],
479
+ )
480
+ parser.add_argument("--info_min", default=0.9, type=float, help="Minimum INFO score.")
481
+ parser.add_argument("--maf_min", default=0.01, type=float, help="Minimum MAF.")
482
+ parser.add_argument(
483
+ "--keep_chr_pos",
484
+ action="store_true",
485
+ default=False,
486
+ help="Keep SNP chromosome and position columns in the output data",
487
+ )
488
+
230
489
 
231
490
  def add_run_all_mode_args(parser):
232
491
  add_shared_args(parser)
233
492
 
234
493
  # Required paths and configurations
235
- parser.add_argument('--gsMap_resource_dir', type=str, required=True,
236
- help='Directory containing gsMap resources (e.g., genome annotations, LD reference panel, etc.).')
237
- parser.add_argument('--hdf5_path', type=str, required=True,
238
- help='Path to the input spatial transcriptomics data (H5AD format).')
239
- parser.add_argument('--annotation', type=str, required=True,
240
- help='Name of the annotation in adata.obs to use.')
241
- parser.add_argument('--data_layer', type=str, default='counts', required=True,
242
- help='Data layer for gene expression (e.g., "count", "counts", "log1p").')
494
+ parser.add_argument(
495
+ "--gsMap_resource_dir",
496
+ type=str,
497
+ required=True,
498
+ help="Directory containing gsMap resources (e.g., genome annotations, LD reference panel, etc.).",
499
+ )
500
+ parser.add_argument(
501
+ "--hdf5_path",
502
+ type=str,
503
+ required=True,
504
+ help="Path to the input spatial transcriptomics data (H5AD format).",
505
+ )
506
+ parser.add_argument(
507
+ "--annotation", type=str, required=True, help="Name of the annotation in adata.obs to use."
508
+ )
509
+ parser.add_argument(
510
+ "--data_layer",
511
+ type=str,
512
+ default="counts",
513
+ required=True,
514
+ help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
515
+ )
243
516
 
244
517
  # GWAS Data Parameters
245
- parser.add_argument('--trait_name', type=str, help='Name of the trait for GWAS analysis (required if sumstats_file is provided).')
246
- parser.add_argument('--sumstats_file', type=str,
247
- help='Path to GWAS summary statistics file. Either sumstats_file or sumstats_config_file is required.')
248
- parser.add_argument('--sumstats_config_file', type=str,
249
- help='Path to GWAS summary statistics config file. Either sumstats_file or sumstats_config_file is required.')
518
+ parser.add_argument(
519
+ "--trait_name",
520
+ type=str,
521
+ help="Name of the trait for GWAS analysis (required if sumstats_file is provided).",
522
+ )
523
+ parser.add_argument(
524
+ "--sumstats_file",
525
+ type=str,
526
+ help="Path to GWAS summary statistics file. Either sumstats_file or sumstats_config_file is required.",
527
+ )
528
+ parser.add_argument(
529
+ "--sumstats_config_file",
530
+ type=str,
531
+ help="Path to GWAS summary statistics config file. Either sumstats_file or sumstats_config_file is required.",
532
+ )
250
533
 
251
534
  # Homolog Data Parameters
252
- parser.add_argument('--homolog_file', type=str,
253
- help='Path to homologous gene for converting gene names from different species to human (optional, used for cross-species analysis).')
535
+ parser.add_argument(
536
+ "--homolog_file",
537
+ type=str,
538
+ help="Path to homologous gene for converting gene names from different species to human (optional, used for cross-species analysis).",
539
+ )
254
540
 
255
541
  # Maximum number of processes
256
- parser.add_argument('--max_processes', type=int, default=10,
257
- help='Maximum number of processes for parallel execution.')
258
-
259
- # # Optional paths for customization
260
- # parser.add_argument('--bfile_root', type=str,
261
- # help='Root path to PLINK bfiles (LD reference panel). If not provided, it will use the default in gsMap_resource_dir.')
262
- # parser.add_argument('--keep_snp_root', type=str,
263
- # help='Root path for SNP filtering. If not provided, it will use the default in gsMap_resource_dir.')
264
- # parser.add_argument('--w_file', type=str,
265
- # help='Path to the regression weight file. If not provided, it will use the default in gsMap_resource_dir.')
266
- # parser.add_argument('--snp_gene_weight_adata_path', type=str,
267
- # help='Path to the SNP-gene weight matrix file. If not provided, it will use the default in gsMap_resource_dir.')
268
- # parser.add_argument('--baseline_annotation_dir', type=str,
269
- # help='Directory containing the baseline annotations for quick mode. If not provided, it will use the default in gsMap_resource_dir.')
270
- # parser.add_argument('--SNP_gene_pair_dir', type=str,
271
- # help='Directory for SNP-gene pair data. If not provided, it will use the default in gsMap_resource_dir.')
542
+ parser.add_argument(
543
+ "--max_processes",
544
+ type=int,
545
+ default=10,
546
+ help="Maximum number of processes for parallel execution.",
547
+ )
548
+
549
+ parser.add_argument(
550
+ "--latent_representation",
551
+ type=str,
552
+ default=None,
553
+ help="Type of latent representation. This should exist in the h5ad obsm.",
554
+ )
555
+ parser.add_argument("--num_neighbour", type=int, default=21, help="Number of neighbors.")
556
+ parser.add_argument(
557
+ "--num_neighbour_spatial", type=int, default=101, help="Number of spatial neighbors."
558
+ )
559
+ parser.add_argument(
560
+ "--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
561
+ )
272
562
 
273
563
 
274
564
  def ensure_path_exists(func):
@@ -288,75 +578,136 @@ def ensure_path_exists(func):
288
578
  @dataclass
289
579
  class ConfigWithAutoPaths:
290
580
  workdir: str
291
- sample_name: str
581
+ sample_name: str | None
292
582
 
293
583
  def __post_init__(self):
294
584
  if self.workdir is None:
295
- raise ValueError('workdir must be provided.')
585
+ raise ValueError("workdir must be provided.")
296
586
 
297
587
  @property
298
588
  @ensure_path_exists
299
589
  def hdf5_with_latent_path(self) -> Path:
300
- return Path(f'{self.workdir}/{self.sample_name}/find_latent_representations/{self.sample_name}_add_latent.h5ad')
590
+ return Path(
591
+ f"{self.workdir}/{self.sample_name}/find_latent_representations/{self.sample_name}_add_latent.h5ad"
592
+ )
301
593
 
302
594
  @property
303
595
  @ensure_path_exists
304
596
  def mkscore_feather_path(self) -> Path:
305
- return Path(f'{self.workdir}/{self.sample_name}/latent_to_gene/{self.sample_name}_gene_marker_score.feather')
597
+ return Path(
598
+ f"{self.workdir}/{self.sample_name}/latent_to_gene/{self.sample_name}_gene_marker_score.feather"
599
+ )
306
600
 
307
601
  @property
308
602
  @ensure_path_exists
309
603
  def ldscore_save_dir(self) -> Path:
310
- return Path(f'{self.workdir}/{self.sample_name}/generate_ldscore')
604
+ return Path(f"{self.workdir}/{self.sample_name}/generate_ldscore")
311
605
 
312
606
  @property
313
607
  @ensure_path_exists
314
608
  def ldsc_save_dir(self) -> Path:
315
- return Path(f'{self.workdir}/{self.sample_name}/spatial_ldsc')
609
+ return Path(f"{self.workdir}/{self.sample_name}/spatial_ldsc")
316
610
 
317
611
  @property
318
612
  @ensure_path_exists
319
613
  def cauchy_save_dir(self) -> Path:
320
- return Path(f'{self.workdir}/{self.sample_name}/cauchy_combination')
614
+ return Path(f"{self.workdir}/{self.sample_name}/cauchy_combination")
321
615
 
322
616
  @ensure_path_exists
323
617
  def get_report_dir(self, trait_name: str) -> Path:
324
- return Path(f'{self.workdir}/{self.sample_name}/report/{trait_name}')
618
+ return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}")
325
619
 
326
620
  def get_gsMap_report_file(self, trait_name: str) -> Path:
327
- return self.get_report_dir(trait_name) / f'{self.sample_name}_{trait_name}_gsMap_Report.html'
621
+ return (
622
+ self.get_report_dir(trait_name) / f"{self.sample_name}_{trait_name}_gsMap_Report.html"
623
+ )
328
624
 
329
625
  @ensure_path_exists
330
626
  def get_manhattan_html_plot_path(self, trait_name: str) -> Path:
331
627
  return Path(
332
- f'{self.workdir}/{self.sample_name}/report/{trait_name}/manhattan_plot/{self.sample_name}_{trait_name}_Diagnostic_Manhattan_Plot.html')
628
+ f"{self.workdir}/{self.sample_name}/report/{trait_name}/manhattan_plot/{self.sample_name}_{trait_name}_Diagnostic_Manhattan_Plot.html"
629
+ )
333
630
 
334
631
  @ensure_path_exists
335
632
  def get_GSS_plot_dir(self, trait_name: str) -> Path:
336
- return Path(f'{self.workdir}/{self.sample_name}/report/{trait_name}/GSS_plot')
633
+ return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}/GSS_plot")
337
634
 
338
635
  def get_GSS_plot_select_gene_file(self, trait_name: str) -> Path:
339
- return self.get_GSS_plot_dir(trait_name) / 'plot_genes.csv'
636
+ return self.get_GSS_plot_dir(trait_name) / "plot_genes.csv"
340
637
 
341
638
  @ensure_path_exists
342
639
  def get_ldsc_result_file(self, trait_name: str) -> Path:
343
- return Path(f'{self.ldsc_save_dir}/{self.sample_name}_{trait_name}.csv.gz')
640
+ return Path(f"{self.ldsc_save_dir}/{self.sample_name}_{trait_name}.csv.gz")
344
641
 
345
642
  @ensure_path_exists
346
643
  def get_cauchy_result_file(self, trait_name: str) -> Path:
347
- return Path(f'{self.cauchy_save_dir}/{self.sample_name}_{trait_name}.Cauchy.csv.gz')
644
+ return Path(f"{self.cauchy_save_dir}/{self.sample_name}_{trait_name}.Cauchy.csv.gz")
348
645
 
349
646
  @ensure_path_exists
350
647
  def get_gene_diagnostic_info_save_path(self, trait_name: str) -> Path:
351
648
  return Path(
352
- f'{self.workdir}/{self.sample_name}/report/{trait_name}/{self.sample_name}_{trait_name}_Gene_Diagnostic_Info.csv')
649
+ f"{self.workdir}/{self.sample_name}/report/{trait_name}/{self.sample_name}_{trait_name}_Gene_Diagnostic_Info.csv"
650
+ )
353
651
 
354
652
  @ensure_path_exists
355
653
  def get_gsMap_plot_save_dir(self, trait_name: str) -> Path:
356
- return Path(f'{self.workdir}/{self.sample_name}/report/{trait_name}/gsMap_plot')
654
+ return Path(f"{self.workdir}/{self.sample_name}/report/{trait_name}/gsMap_plot")
357
655
 
358
656
  def get_gsMap_html_plot_save_path(self, trait_name: str) -> Path:
359
- return self.get_gsMap_plot_save_dir(trait_name) / f'{self.sample_name}_{trait_name}_gsMap_plot.html'
657
+ return (
658
+ self.get_gsMap_plot_save_dir(trait_name)
659
+ / f"{self.sample_name}_{trait_name}_gsMap_plot.html"
660
+ )
661
+
662
+
663
+ @dataclass
664
+ class CreateSliceMeanConfig:
665
+ slice_mean_output_file: str | Path
666
+ h5ad_yaml: str | dict | None = None
667
+ sample_name_list: list | None = None
668
+ h5ad_list: list | None = None
669
+ homolog_file: str | None = None
670
+ species: str | None = None
671
+ data_layer: str = None
672
+
673
+ def __post_init__(self):
674
+ if self.h5ad_list is None and self.h5ad_yaml is None:
675
+ raise ValueError("At least one of --h5ad_list or --h5ad_yaml must be provided.")
676
+ if self.h5ad_yaml is not None:
677
+ if isinstance(self.h5ad_yaml, str):
678
+ logger.info(f"Reading h5ad yaml file: {self.h5ad_yaml}")
679
+ h5ad_dict = (
680
+ yaml.safe_load(open(self.h5ad_yaml))
681
+ if isinstance(self.h5ad_yaml, str)
682
+ else self.h5ad_yaml
683
+ )
684
+ elif self.sample_name_list and self.h5ad_list:
685
+ logger.info("Reading sample name list and h5ad list")
686
+ h5ad_dict = dict(zip(self.sample_name_list, self.h5ad_list, strict=False))
687
+ else:
688
+ raise ValueError(
689
+ "Please provide either h5ad_yaml or both sample_name_list and h5ad_list."
690
+ )
691
+
692
+ # check if sample names is unique
693
+ assert len(h5ad_dict) == len(set(h5ad_dict)), "Sample names must be unique."
694
+ assert len(h5ad_dict) > 1, "At least two samples are required."
695
+
696
+ logger.info(f"Input h5ad files: {h5ad_dict}")
697
+
698
+ # Check if all files exist
699
+ self.h5ad_dict = {}
700
+ for sample_name, h5ad_file in h5ad_dict.items():
701
+ h5ad_file = Path(h5ad_file)
702
+ if not h5ad_file.exists():
703
+ raise FileNotFoundError(f"{h5ad_file} does not exist.")
704
+ self.h5ad_dict[sample_name] = h5ad_file
705
+
706
+ self.slice_mean_output_file = Path(self.slice_mean_output_file)
707
+ self.slice_mean_output_file.parent.mkdir(parents=True, exist_ok=True)
708
+
709
+ verify_homolog_file_format(self)
710
+
360
711
 
361
712
  @dataclass
362
713
  class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
@@ -389,24 +740,27 @@ class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
389
740
  # self.output_hdf5_path = self.hdf5_with_latent_path
390
741
  if self.hierarchically:
391
742
  if self.annotation is None:
392
- raise ValueError('annotation must be provided if hierarchically is True.')
743
+ raise ValueError("annotation must be provided if hierarchically is True.")
393
744
  logger.info(
394
- f'------Hierarchical mode is enabled. This will find the latent representations within each annotation.')
745
+ "------Hierarchical mode is enabled. This will find the latent representations within each annotation."
746
+ )
395
747
 
396
748
  # remind for not providing annotation
397
749
  if self.annotation is None:
398
750
  logger.warning(
399
- 'annotation is not provided. This will find the latent representations for the whole dataset.')
751
+ "annotation is not provided. This will find the latent representations for the whole dataset."
752
+ )
400
753
  else:
401
- logger.info(f'------Find latent representations for {self.annotation}...')
754
+ logger.info(f"------Find latent representations for {self.annotation}...")
402
755
 
403
756
 
404
757
  @dataclass
405
758
  class LatentToGeneConfig(ConfigWithAutoPaths):
406
759
  # input_hdf5_with_latent_path: str
407
760
  # output_feather_path: str
761
+ input_hdf5_path: str | Path = None
408
762
  no_expression_fraction: bool = False
409
- latent_representation: str = 'latent_GVAE'
763
+ latent_representation: str = None
410
764
  num_neighbour: int = 21
411
765
  num_neighbour_spatial: int = 101
412
766
  homolog_file: str = None
@@ -415,31 +769,61 @@ class LatentToGeneConfig(ConfigWithAutoPaths):
415
769
  species: str = None
416
770
 
417
771
  def __post_init__(self):
418
- if self.homolog_file is not None:
419
- logger.info(f"User provided homolog file to map gene names to human: {self.homolog_file}")
420
- # check the format of the homolog file
421
- with open(self.homolog_file, 'r') as f:
422
- first_line = f.readline().strip()
423
- _n_col = len(first_line.split())
424
- if _n_col != 2:
425
- raise ValueError(
426
- f"Invalid homolog file format. Expected 2 columns, first column should be other species gene name, second column should be human gene name. "
427
- f"Got {_n_col} columns in the first line.")
428
- else:
429
- first_col_name, second_col_name = first_line.split()
430
- self.species = first_col_name
431
- logger.info(
432
- f"Homolog file provided and will map gene name from column1:{first_col_name} to column2:{second_col_name}")
772
+ if self.input_hdf5_path is None:
773
+ self.input_hdf5_path = self.hdf5_with_latent_path
774
+ assert self.input_hdf5_path.exists(), (
775
+ f"{self.input_hdf5_path} does not exist. Please run FindLatentRepresentations first."
776
+ )
433
777
  else:
434
- logger.info("No homolog file provided. Run in human mode.")
778
+ assert Path(self.input_hdf5_path).exists(), f"{self.input_hdf5_path} does not exist."
779
+ # copy to self.hdf5_with_latent_path
780
+ import shutil
781
+
782
+ shutil.copy2(self.input_hdf5_path, self.hdf5_with_latent_path)
783
+
784
+ if self.latent_representation is not None:
785
+ logger.info(f"Using the provided latent representation: {self.latent_representation}")
786
+ else:
787
+ self.latent_representation = "latent_GVAE"
788
+ logger.info(f"Using default latent representation: {self.latent_representation}")
789
+
790
+ if self.gM_slices is not None:
791
+ assert Path(self.gM_slices).exists(), f"{self.gM_slices} does not exist."
792
+ logger.info(f"Using the provided slice mean file: {self.gM_slices}.")
793
+
794
+ verify_homolog_file_format(self)
795
+
796
+
797
+ def verify_homolog_file_format(config):
798
+ if config.homolog_file is not None:
799
+ logger.info(
800
+ f"User provided homolog file to map gene names to human: {config.homolog_file}"
801
+ )
802
+ # check the format of the homolog file
803
+ with open(config.homolog_file) as f:
804
+ first_line = f.readline().strip()
805
+ _n_col = len(first_line.split())
806
+ if _n_col != 2:
807
+ raise ValueError(
808
+ f"Invalid homolog file format. Expected 2 columns, first column should be other species gene name, second column should be human gene name. "
809
+ f"Got {_n_col} columns in the first line."
810
+ )
811
+ else:
812
+ first_col_name, second_col_name = first_line.split()
813
+ config.species = first_col_name
814
+ logger.info(
815
+ f"Homolog file provided and will map gene name from column1:{first_col_name} to column2:{second_col_name}"
816
+ )
817
+ else:
818
+ logger.info("No homolog file provided. Run in human mode.")
435
819
 
436
820
 
437
821
  @dataclass
438
822
  class GenerateLDScoreConfig(ConfigWithAutoPaths):
439
- chrom: Union[int, str]
823
+ chrom: int | str
440
824
 
441
825
  bfile_root: str
442
- keep_snp_root: Optional[str]
826
+ keep_snp_root: str | None
443
827
 
444
828
  # annotation by gene distance
445
829
  gtf_annotation_file: str
@@ -447,74 +831,106 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
447
831
 
448
832
  # annotation by enhancer
449
833
  enhancer_annotation_file: str = None
450
- snp_multiple_enhancer_strategy: Literal['max_mkscore', 'nearest_TSS'] = 'max_mkscore'
451
- gene_window_enhancer_priority: Optional[Literal['gene_window_first', 'enhancer_first', 'enhancer_only',]] = None
834
+ snp_multiple_enhancer_strategy: Literal["max_mkscore", "nearest_TSS"] = "max_mkscore"
835
+ gene_window_enhancer_priority: (
836
+ Literal["gene_window_first", "enhancer_first", "enhancer_only"] | None
837
+ ) = None
452
838
 
453
839
  # for calculating ld score
454
840
  additional_baseline_annotation: str = None
455
841
  spots_per_chunk: int = 1_000
456
842
  ld_wind: int = 1
457
- ld_unit: str = 'CM'
843
+ ld_unit: str = "CM"
458
844
 
459
845
  # zarr config
460
- ldscore_save_format: Literal['feather', 'zarr', 'quick_mode'] = 'feather'
846
+ ldscore_save_format: Literal["feather", "zarr", "quick_mode"] = "feather"
461
847
 
462
- zarr_chunk_size: Tuple[int, int] = None
848
+ zarr_chunk_size: tuple[int, int] = None
463
849
 
464
850
  # for pre calculating the SNP Gene ldscore Weight
465
851
  save_pre_calculate_snp_gene_weight_matrix: bool = False
466
852
 
467
- baseline_annotation_dir: Optional[str] = None
468
- SNP_gene_pair_dir: Optional[str] = None
853
+ baseline_annotation_dir: str | None = None
854
+ SNP_gene_pair_dir: str | None = None
855
+
469
856
  def __post_init__(self):
470
857
  # if self.mkscore_feather_file is None:
471
858
  # self.mkscore_feather_file = self._get_mkscore_feather_path()
472
859
 
473
- if self.enhancer_annotation_file is not None and self.gene_window_enhancer_priority is None:
474
- logger.warning("enhancer_annotation_file is provided but gene_window_enhancer_priority is not provided. "
475
- "by default, gene_window_enhancer_priority is set to 'enhancer_only', when enhancer_annotation_file is provided.")
476
- self.gene_window_enhancer_priority = 'enhancer_only'
477
- if self.enhancer_annotation_file is None and self.gene_window_enhancer_priority is not None:
478
- logger.warning("gene_window_enhancer_priority is provided but enhancer_annotation_file is not provided. "
479
- "by default, gene_window_enhancer_priority is set to None, when enhancer_annotation_file is not provided.")
860
+ if (
861
+ self.enhancer_annotation_file is not None
862
+ and self.gene_window_enhancer_priority is None
863
+ ):
864
+ logger.warning(
865
+ "enhancer_annotation_file is provided but gene_window_enhancer_priority is not provided. "
866
+ "by default, gene_window_enhancer_priority is set to 'enhancer_only', when enhancer_annotation_file is provided."
867
+ )
868
+ self.gene_window_enhancer_priority = "enhancer_only"
869
+ if (
870
+ self.enhancer_annotation_file is None
871
+ and self.gene_window_enhancer_priority is not None
872
+ ):
873
+ logger.warning(
874
+ "gene_window_enhancer_priority is provided but enhancer_annotation_file is not provided. "
875
+ "by default, gene_window_enhancer_priority is set to None, when enhancer_annotation_file is not provided."
876
+ )
480
877
  self.gene_window_enhancer_priority = None
481
- assert self.gene_window_enhancer_priority in [None, 'gene_window_first', 'enhancer_first', 'enhancer_only', ], \
878
+ assert self.gene_window_enhancer_priority in [
879
+ None,
880
+ "gene_window_first",
881
+ "enhancer_first",
882
+ "enhancer_only",
883
+ ], (
482
884
  f"gene_window_enhancer_priority must be one of None, 'gene_window_first', 'enhancer_first', 'enhancer_only', but got {self.gene_window_enhancer_priority}."
483
- if self.gene_window_enhancer_priority in ['gene_window_first', 'enhancer_first']:
484
- logger.info(f'Both gene_window and enhancer annotation will be used to calculate LD score. ')
885
+ )
886
+ if self.gene_window_enhancer_priority in ["gene_window_first", "enhancer_first"]:
485
887
  logger.info(
486
- f'SNP within +-{self.gene_window_size} bp of gene body will be used and enhancer annotation will be used to calculate LD score. If a snp maps to multiple enhancers, the strategy to choose by your select strategy: {self.snp_multiple_enhancer_strategy}.')
487
- elif self.gene_window_enhancer_priority == 'enhancer_only':
488
- logger.info(f'Only enhancer annotation will be used to calculate LD score. ')
888
+ "Both gene_window and enhancer annotation will be used to calculate LD score. "
889
+ )
890
+ logger.info(
891
+ f"SNP within +-{self.gene_window_size} bp of gene body will be used and enhancer annotation will be used to calculate LD score. If a snp maps to multiple enhancers, the strategy to choose by your select strategy: {self.snp_multiple_enhancer_strategy}."
892
+ )
893
+ elif self.gene_window_enhancer_priority == "enhancer_only":
894
+ logger.info("Only enhancer annotation will be used to calculate LD score. ")
489
895
  else:
490
896
  logger.info(
491
- f'Only gene window annotation will be used to calculate LD score. SNP within +-{self.gene_window_size} bp of gene body will be used. ')
897
+ f"Only gene window annotation will be used to calculate LD score. SNP within +-{self.gene_window_size} bp of gene body will be used. "
898
+ )
492
899
 
493
900
  # remind for baseline annotation
494
901
  if self.additional_baseline_annotation is None:
495
- logger.info(f'------Baseline annotation is not provided. Default baseline annotation will be used.')
902
+ logger.info(
903
+ "------Baseline annotation is not provided. Default baseline annotation will be used."
904
+ )
496
905
  else:
497
906
  logger.info(
498
- f'------Baseline annotation is provided. Additional baseline annotation will be used with the default baseline annotation.')
499
- logger.info(f'------Baseline annotation directory: {self.additional_baseline_annotation}')
907
+ "------Baseline annotation is provided. Additional baseline annotation will be used with the default baseline annotation."
908
+ )
909
+ logger.info(
910
+ f"------Baseline annotation directory: {self.additional_baseline_annotation}"
911
+ )
500
912
  # check the existence of baseline annotation
501
- if self.chrom == 'all':
913
+ if self.chrom == "all":
502
914
  for chrom in range(1, 23):
503
915
  chrom = str(chrom)
504
- baseline_annotation_path = Path(
505
- self.additional_baseline_annotation) / f'baseline.{chrom}.annot.gz'
916
+ baseline_annotation_path = (
917
+ Path(self.additional_baseline_annotation) / f"baseline.{chrom}.annot.gz"
918
+ )
506
919
  if not baseline_annotation_path.exists():
507
920
  raise FileNotFoundError(
508
- f'baseline.{chrom}.annot.gz is not found in {self.additional_baseline_annotation}.')
921
+ f"baseline.{chrom}.annot.gz is not found in {self.additional_baseline_annotation}."
922
+ )
509
923
  else:
510
- baseline_annotation_path = Path(
511
- self.additional_baseline_annotation) / f'baseline.{self.chrom}.annot.gz'
924
+ baseline_annotation_path = (
925
+ Path(self.additional_baseline_annotation) / f"baseline.{self.chrom}.annot.gz"
926
+ )
512
927
  if not baseline_annotation_path.exists():
513
928
  raise FileNotFoundError(
514
- f'baseline.{self.chrom}.annot.gz is not found in {self.additional_baseline_annotation}.')
929
+ f"baseline.{self.chrom}.annot.gz is not found in {self.additional_baseline_annotation}."
930
+ )
515
931
 
516
932
  # set the default zarr chunk size
517
- if self.ldscore_save_format == 'zarr' and self.zarr_chunk_size is None:
933
+ if self.ldscore_save_format == "zarr" and self.zarr_chunk_size is None:
518
934
  self.zarr_chunk_size = (10_000, self.spots_per_chunk)
519
935
 
520
936
 
@@ -523,56 +939,61 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
523
939
  w_file: str
524
940
  # ldscore_save_dir: str
525
941
  use_additional_baseline_annotation: bool = True
526
- trait_name: Optional[str] = None
527
- sumstats_file: Optional[str] = None
528
- sumstats_config_file: Optional[str] = None
942
+ trait_name: str | None = None
943
+ sumstats_file: str | None = None
944
+ sumstats_config_file: str | None = None
529
945
  num_processes: int = 4
530
946
  not_M_5_50: bool = False
531
947
  n_blocks: int = 200
532
- chisq_max: Optional[int] = None
533
- all_chunk: Optional[int] = None
534
- chunk_range: Optional[Tuple[int, int]] = None
948
+ chisq_max: int | None = None
949
+ all_chunk: int | None = None
950
+ chunk_range: tuple[int, int] | None = None
535
951
 
536
- ldscore_save_format: Literal['feather', 'zarr', 'quick_mode'] = 'feather'
952
+ ldscore_save_format: Literal["feather", "zarr", "quick_mode"] = "feather"
537
953
 
538
954
  spots_per_chunk_quick_mode: int = 1_000
539
- snp_gene_weight_adata_path: Optional[str] = None
955
+ snp_gene_weight_adata_path: str | None = None
540
956
 
541
957
  def __post_init__(self):
542
958
  super().__post_init__()
543
959
  if self.sumstats_file is None and self.sumstats_config_file is None:
544
- raise ValueError('One of sumstats_file and sumstats_config_file must be provided.')
960
+ raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
545
961
  if self.sumstats_file is not None and self.sumstats_config_file is not None:
546
- raise ValueError('Only one of sumstats_file and sumstats_config_file must be provided.')
962
+ raise ValueError(
963
+ "Only one of sumstats_file and sumstats_config_file must be provided."
964
+ )
547
965
  if self.sumstats_file is not None and self.trait_name is None:
548
- raise ValueError('trait_name must be provided if sumstats_file is provided.')
966
+ raise ValueError("trait_name must be provided if sumstats_file is provided.")
549
967
  if self.sumstats_config_file is not None and self.trait_name is not None:
550
- raise ValueError('trait_name must not be provided if sumstats_config_file is provided.')
968
+ raise ValueError(
969
+ "trait_name must not be provided if sumstats_config_file is provided."
970
+ )
551
971
  self.sumstats_config_dict = {}
552
972
  # load the sumstats config file
553
973
  if self.sumstats_config_file is not None:
554
974
  import yaml
975
+
555
976
  with open(self.sumstats_config_file) as f:
556
977
  config = yaml.load(f, Loader=yaml.FullLoader)
557
- for trait_name, sumstats_file in config.items():
558
- assert Path(sumstats_file).exists(), f'{sumstats_file} does not exist.'
978
+ for _trait_name, sumstats_file in config.items():
979
+ assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
559
980
  # load the sumstats file
560
981
  elif self.sumstats_file is not None:
561
982
  self.sumstats_config_dict[self.trait_name] = self.sumstats_file
562
983
  else:
563
- raise ValueError('One of sumstats_file and sumstats_config_file must be provided.')
984
+ raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
564
985
 
565
986
  for sumstats_file in self.sumstats_config_dict.values():
566
- assert Path(sumstats_file).exists(), f'{sumstats_file} does not exist.'
987
+ assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
567
988
 
568
989
  # check if additional baseline annotation is exist
569
990
  # self.use_additional_baseline_annotation = False
570
-
991
+
571
992
  if self.use_additional_baseline_annotation:
572
993
  self.process_additional_baseline_annotation()
573
994
 
574
995
  def process_additional_baseline_annotation(self):
575
- additional_baseline_annotation = Path(self.ldscore_save_dir) / 'additional_baseline'
996
+ additional_baseline_annotation = Path(self.ldscore_save_dir) / "additional_baseline"
576
997
  dir_exists = additional_baseline_annotation.exists()
577
998
 
578
999
  if not dir_exists:
@@ -580,7 +1001,7 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
580
1001
  # if self.use_additional_baseline_annotation:
581
1002
  # logger.warning(f"additional_baseline directory is not found in {self.ldscore_save_dir}.")
582
1003
  # print('''\
583
- # if you want to use additional baseline annotation,
1004
+ # if you want to use additional baseline annotation,
584
1005
  # please provide additional baseline annotation when calculating ld score.
585
1006
  # ''')
586
1007
  # raise FileNotFoundError(
@@ -589,15 +1010,21 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
589
1010
  # self.use_additional_baseline_annotation = self.use_additional_baseline_annotation or True
590
1011
  else:
591
1012
  logger.info(
592
- f'------Additional baseline annotation is provided. It will be used with the default baseline annotation.')
593
- logger.info(f'------Additional baseline annotation directory: {additional_baseline_annotation}')
1013
+ "------Additional baseline annotation is provided. It will be used with the default baseline annotation."
1014
+ )
1015
+ logger.info(
1016
+ f"------Additional baseline annotation directory: {additional_baseline_annotation}"
1017
+ )
594
1018
 
595
1019
  chrom_list = range(1, 23)
596
1020
  for chrom in chrom_list:
597
- baseline_annotation_path = additional_baseline_annotation / f'baseline.{chrom}.l2.ldscore.feather'
1021
+ baseline_annotation_path = (
1022
+ additional_baseline_annotation / f"baseline.{chrom}.l2.ldscore.feather"
1023
+ )
598
1024
  if not baseline_annotation_path.exists():
599
1025
  raise FileNotFoundError(
600
- f'baseline.{chrom}.annot.gz is not found in {additional_baseline_annotation}.')
1026
+ f"baseline.{chrom}.annot.gz is not found in {additional_baseline_annotation}."
1027
+ )
601
1028
  return None
602
1029
 
603
1030
 
@@ -605,8 +1032,25 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
605
1032
  class CauchyCombinationConfig(ConfigWithAutoPaths):
606
1033
  trait_name: str
607
1034
  annotation: str
608
- meta: str = None
609
- slide: str = None
1035
+ sample_name_list: list[str] = dataclasses.field(default_factory=list)
1036
+ output_file: str | Path | None = None
1037
+
1038
+ def __post_init__(self):
1039
+ if self.sample_name is not None:
1040
+ if len(self.sample_name_list) > 0:
1041
+ raise ValueError("Only one of sample_name and sample_name_list must be provided.")
1042
+ else:
1043
+ self.sample_name_list = [self.sample_name]
1044
+ self.output_file = (
1045
+ self.get_cauchy_result_file(self.trait_name)
1046
+ if self.output_file is None
1047
+ else self.output_file
1048
+ )
1049
+ else:
1050
+ assert len(self.sample_name_list) > 0, "At least one sample name must be provided."
1051
+ assert self.output_file is not None, (
1052
+ "Output_file must be provided if sample_name_list is provided."
1053
+ )
610
1054
 
611
1055
 
612
1056
  @dataclass
@@ -618,7 +1062,7 @@ class VisualizeConfig(ConfigWithAutoPaths):
618
1062
  fig_height: int = 600
619
1063
  fig_width: int = 800
620
1064
  point_size: int = None
621
- fig_style: Literal['dark', 'light'] = 'light'
1065
+ fig_style: Literal["dark", "light"] = "light"
622
1066
 
623
1067
 
624
1068
  @dataclass
@@ -628,22 +1072,26 @@ class DiagnosisConfig(ConfigWithAutoPaths):
628
1072
 
629
1073
  trait_name: str
630
1074
  sumstats_file: str
631
- plot_type: Literal['manhattan', 'GSS', 'gsMap', 'all'] = 'all'
1075
+ plot_type: Literal["manhattan", "GSS", "gsMap", "all"] = "all"
632
1076
  top_corr_genes: int = 50
633
- selected_genes: Optional[List[str]] = None
1077
+ selected_genes: list[str] | None = None
634
1078
 
635
- fig_width: Optional[int] = None
636
- fig_height: Optional[int] = None
637
- point_size: Optional[int] = None
638
- fig_style: Literal['dark', 'light'] = 'light'
1079
+ fig_width: int | None = None
1080
+ fig_height: int | None = None
1081
+ point_size: int | None = None
1082
+ fig_style: Literal["dark", "light"] = "light"
639
1083
 
640
1084
  def __post_init__(self):
641
1085
  if any([self.fig_width, self.fig_height, self.point_size]):
642
- logger.info('Customizing the figure size and point size.')
643
- assert all([self.fig_width, self.fig_height, self.point_size]), 'All of fig_width, fig_height, and point_size must be provided.'
1086
+ logger.info("Customizing the figure size and point size.")
1087
+ assert all([self.fig_width, self.fig_height, self.point_size]), (
1088
+ "All of fig_width, fig_height, and point_size must be provided."
1089
+ )
644
1090
  self.customize_fig = True
645
1091
  else:
646
1092
  self.customize_fig = False
1093
+
1094
+
647
1095
  @dataclass
648
1096
  class ReportConfig(DiagnosisConfig):
649
1097
  pass
@@ -656,57 +1104,78 @@ class RunAllModeConfig(ConfigWithAutoPaths):
656
1104
  # == ST DATA PARAMETERS ==
657
1105
  hdf5_path: str
658
1106
  annotation: str
659
- data_layer: str = 'X'
1107
+ data_layer: str = "X"
1108
+
1109
+ # == latent 2 Gene PARAMETERS ==
1110
+ gM_slices: str | None = None
1111
+ latent_representation: str = None
1112
+ num_neighbour: int = 21
1113
+ num_neighbour_spatial: int = 101
660
1114
 
661
1115
  # ==GWAS DATA PARAMETERS==
662
- trait_name: Optional[str] = None
663
- sumstats_file: Optional[str] = None
664
- sumstats_config_file: Optional[str] = None
1116
+ trait_name: str | None = None
1117
+ sumstats_file: str | None = None
1118
+ sumstats_config_file: str | None = None
665
1119
 
666
1120
  # === homolog PARAMETERS ===
667
- homolog_file: Optional[str] = None
1121
+ homolog_file: str | None = None
668
1122
 
669
1123
  max_processes: int = 10
670
1124
 
671
1125
  def __post_init__(self):
672
1126
  super().__post_init__()
673
- self.gtffile = f"{self.gsMap_resource_dir}/genome_annotation/gtf/gencode.v39lift37.annotation.gtf"
674
- self.bfile_root = f"{self.gsMap_resource_dir}/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC"
1127
+ self.gtffile = (
1128
+ f"{self.gsMap_resource_dir}/genome_annotation/gtf/gencode.v39lift37.annotation.gtf"
1129
+ )
1130
+ self.bfile_root = (
1131
+ f"{self.gsMap_resource_dir}/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC"
1132
+ )
675
1133
  self.keep_snp_root = f"{self.gsMap_resource_dir}/LDSC_resource/hapmap3_snps/hm"
676
1134
  self.w_file = f"{self.gsMap_resource_dir}/LDSC_resource/weights_hm3_no_hla/weights."
677
- self.snp_gene_weight_adata_path = f"{self.gsMap_resource_dir}/quick_mode/snp_gene_weight_matrix.h5ad"
678
- self.baseline_annotation_dir = Path(f"{self.gsMap_resource_dir}/quick_mode/baseline").resolve()
679
- self.SNP_gene_pair_dir = Path(f"{self.gsMap_resource_dir}/quick_mode/SNP_gene_pair").resolve()
1135
+ self.snp_gene_weight_adata_path = (
1136
+ f"{self.gsMap_resource_dir}/quick_mode/snp_gene_weight_matrix.h5ad"
1137
+ )
1138
+ self.baseline_annotation_dir = Path(
1139
+ f"{self.gsMap_resource_dir}/quick_mode/baseline"
1140
+ ).resolve()
1141
+ self.SNP_gene_pair_dir = Path(
1142
+ f"{self.gsMap_resource_dir}/quick_mode/SNP_gene_pair"
1143
+ ).resolve()
680
1144
  # check the existence of the input files and resources files
681
1145
  for file in [self.hdf5_path, self.gtffile]:
682
1146
  if not Path(file).exists():
683
1147
  raise FileNotFoundError(f"File {file} does not exist.")
684
1148
 
685
1149
  if self.sumstats_file is None and self.sumstats_config_file is None:
686
- raise ValueError('One of sumstats_file and sumstats_config_file must be provided.')
1150
+ raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
687
1151
  if self.sumstats_file is not None and self.sumstats_config_file is not None:
688
- raise ValueError('Only one of sumstats_file and sumstats_config_file must be provided.')
1152
+ raise ValueError(
1153
+ "Only one of sumstats_file and sumstats_config_file must be provided."
1154
+ )
689
1155
  if self.sumstats_file is not None and self.trait_name is None:
690
- raise ValueError('trait_name must be provided if sumstats_file is provided.')
1156
+ raise ValueError("trait_name must be provided if sumstats_file is provided.")
691
1157
  if self.sumstats_config_file is not None and self.trait_name is not None:
692
- raise ValueError('trait_name must not be provided if sumstats_config_file is provided.')
1158
+ raise ValueError(
1159
+ "trait_name must not be provided if sumstats_config_file is provided."
1160
+ )
693
1161
  self.sumstats_config_dict = {}
694
1162
  # load the sumstats config file
695
1163
  if self.sumstats_config_file is not None:
696
1164
  import yaml
1165
+
697
1166
  with open(self.sumstats_config_file) as f:
698
1167
  config = yaml.load(f, Loader=yaml.FullLoader)
699
1168
  for trait_name, sumstats_file in config.items():
700
- assert Path(sumstats_file).exists(), f'{sumstats_file} does not exist.'
1169
+ assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
701
1170
  self.sumstats_config_dict[trait_name] = sumstats_file
702
1171
  # load the sumstats file
703
1172
  elif self.sumstats_file is not None and self.trait_name is not None:
704
1173
  self.sumstats_config_dict[self.trait_name] = self.sumstats_file
705
1174
  else:
706
- raise ValueError('One of sumstats_file and sumstats_config_file must be provided.')
1175
+ raise ValueError("One of sumstats_file and sumstats_config_file must be provided.")
707
1176
 
708
1177
  for sumstats_file in self.sumstats_config_dict.values():
709
- assert Path(sumstats_file).exists(), f'{sumstats_file} does not exist.'
1178
+ assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
710
1179
 
711
1180
 
712
1181
  @dataclass
@@ -729,78 +1198,115 @@ class FormatSumstatsConfig:
729
1198
  format: str = None
730
1199
  chr: str = None
731
1200
  pos: str = None
732
- chunksize: int = 1e+7
1201
+ chunksize: int = 1e7
733
1202
  info_min: float = 0.9
734
1203
  maf_min: float = 0.01
735
1204
  keep_chr_pos: bool = False
736
1205
 
737
1206
 
738
- @register_cli(name='run_find_latent_representations',
739
- description='Run Find_latent_representations \nFind the latent representations of each spot by running GNN-VAE',
740
- add_args_function=add_find_latent_representations_args)
1207
+ @register_cli(
1208
+ name="run_find_latent_representations",
1209
+ description="Run Find_latent_representations \nFind the latent representations of each spot by running GNN-VAE",
1210
+ add_args_function=add_find_latent_representations_args,
1211
+ )
741
1212
  def run_find_latent_representation_from_cli(args: argparse.Namespace):
742
1213
  from gsMap.find_latent_representation import run_find_latent_representation
1214
+
743
1215
  config = get_dataclass_from_parser(args, FindLatentRepresentationsConfig)
744
1216
  run_find_latent_representation(config)
745
1217
 
746
1218
 
747
- @register_cli(name='run_latent_to_gene',
748
- description='Run Latent_to_gene \nEstimate gene marker gene scores for each spot by using latent representations from nearby spots',
749
- add_args_function=add_latent_to_gene_args)
1219
+ @register_cli(
1220
+ name="run_latent_to_gene",
1221
+ description="Run Latent_to_gene \nEstimate gene marker gene scores for each spot by using latent representations from nearby spots",
1222
+ add_args_function=add_latent_to_gene_args,
1223
+ )
750
1224
  def run_latent_to_gene_from_cli(args: argparse.Namespace):
751
1225
  from gsMap.latent_to_gene import run_latent_to_gene
1226
+
752
1227
  config = get_dataclass_from_parser(args, LatentToGeneConfig)
753
1228
  run_latent_to_gene(config)
754
1229
 
755
1230
 
756
- @register_cli(name='run_generate_ldscore',
757
- description='Run Generate_ldscore \nGenerate LD scores for each spot',
758
- add_args_function=add_generate_ldscore_args)
1231
+ @register_cli(
1232
+ name="run_generate_ldscore",
1233
+ description="Run Generate_ldscore \nGenerate LD scores for each spot",
1234
+ add_args_function=add_generate_ldscore_args,
1235
+ )
759
1236
  def run_generate_ldscore_from_cli(args: argparse.Namespace):
760
1237
  from gsMap.generate_ldscore import run_generate_ldscore
1238
+
761
1239
  config = get_dataclass_from_parser(args, GenerateLDScoreConfig)
762
1240
  run_generate_ldscore(config)
763
1241
 
764
1242
 
765
- @register_cli(name='run_spatial_ldsc',
766
- description='Run Spatial_ldsc \nRun spatial LDSC for each spot',
767
- add_args_function=add_spatial_ldsc_args)
1243
+ @register_cli(
1244
+ name="run_spatial_ldsc",
1245
+ description="Run Spatial_ldsc \nRun spatial LDSC for each spot",
1246
+ add_args_function=add_spatial_ldsc_args,
1247
+ )
768
1248
  def run_spatial_ldsc_from_cli(args: argparse.Namespace):
769
1249
  from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
1250
+
770
1251
  config = get_dataclass_from_parser(args, SpatialLDSCConfig)
771
1252
  run_spatial_ldsc(config)
772
1253
 
773
1254
 
774
- @register_cli(name='run_cauchy_combination',
775
- description='Run Cauchy_combination for each annotation',
776
- add_args_function=add_Cauchy_combination_args)
1255
+ @register_cli(
1256
+ name="run_cauchy_combination",
1257
+ description="Run Cauchy_combination for each annotation",
1258
+ add_args_function=add_Cauchy_combination_args,
1259
+ )
777
1260
  def run_Cauchy_combination_from_cli(args: argparse.Namespace):
778
1261
  from gsMap.cauchy_combination_test import run_Cauchy_combination
1262
+
779
1263
  config = get_dataclass_from_parser(args, CauchyCombinationConfig)
780
1264
  run_Cauchy_combination(config)
781
1265
 
782
1266
 
783
- @register_cli(name='run_report',
784
- description='Run Report to generate diagnostic plots and tables',
785
- add_args_function=add_report_args)
1267
+ @register_cli(
1268
+ name="run_report",
1269
+ description="Run Report to generate diagnostic plots and tables",
1270
+ add_args_function=add_report_args,
1271
+ )
786
1272
  def run_Report_from_cli(args: argparse.Namespace):
787
1273
  from gsMap.report import run_report
1274
+
788
1275
  config = get_dataclass_from_parser(args, ReportConfig)
789
1276
  run_report(config)
790
1277
 
791
1278
 
792
- @register_cli(name='format_sumstats',
793
- description='Format gwas summary statistics',
794
- add_args_function=add_format_sumstats_args)
1279
+ @register_cli(
1280
+ name="format_sumstats",
1281
+ description="Format gwas summary statistics",
1282
+ add_args_function=add_format_sumstats_args,
1283
+ )
795
1284
  def gwas_format_from_cli(args: argparse.Namespace):
796
1285
  from gsMap.format_sumstats import gwas_format
1286
+
797
1287
  config = get_dataclass_from_parser(args, FormatSumstatsConfig)
798
1288
  gwas_format(config)
799
1289
 
800
- @register_cli(name='quick_mode',
801
- description='Run all the gsMap pipeline in quick mode',
802
- add_args_function=add_run_all_mode_args)
1290
+
1291
+ @register_cli(
1292
+ name="quick_mode",
1293
+ description="Run all the gsMap pipeline in quick mode",
1294
+ add_args_function=add_run_all_mode_args,
1295
+ )
803
1296
  def run_all_mode_from_cli(args: argparse.Namespace):
804
1297
  from gsMap.run_all_mode import run_pipeline
1298
+
805
1299
  config = get_dataclass_from_parser(args, RunAllModeConfig)
806
1300
  run_pipeline(config)
1301
+
1302
+
1303
+ @register_cli(
1304
+ name="create_slice_mean",
1305
+ description="Create slice mean from multiple h5ad files",
1306
+ add_args_function=add_create_slice_mean_args,
1307
+ )
1308
+ def create_slice_mean_from_cli(args: argparse.Namespace):
1309
+ from gsMap.create_slice_mean import run_create_slice_mean
1310
+
1311
+ config = get_dataclass_from_parser(args, CreateSliceMeanConfig)
1312
+ run_create_slice_mean(config)