gsMap3D 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/__init__.py +13 -0
- gsMap/__main__.py +4 -0
- gsMap/cauchy_combination_test.py +342 -0
- gsMap/cli.py +355 -0
- gsMap/config/__init__.py +72 -0
- gsMap/config/base.py +296 -0
- gsMap/config/cauchy_config.py +79 -0
- gsMap/config/dataclasses.py +235 -0
- gsMap/config/decorators.py +302 -0
- gsMap/config/find_latent_config.py +276 -0
- gsMap/config/format_sumstats_config.py +54 -0
- gsMap/config/latent2gene_config.py +461 -0
- gsMap/config/ldscore_config.py +261 -0
- gsMap/config/quick_mode_config.py +242 -0
- gsMap/config/report_config.py +81 -0
- gsMap/config/spatial_ldsc_config.py +334 -0
- gsMap/config/utils.py +286 -0
- gsMap/find_latent/__init__.py +3 -0
- gsMap/find_latent/find_latent_representation.py +312 -0
- gsMap/find_latent/gnn/distribution.py +498 -0
- gsMap/find_latent/gnn/encoder_decoder.py +186 -0
- gsMap/find_latent/gnn/gcn.py +85 -0
- gsMap/find_latent/gnn/gene_former.py +164 -0
- gsMap/find_latent/gnn/loss.py +18 -0
- gsMap/find_latent/gnn/st_model.py +125 -0
- gsMap/find_latent/gnn/train_step.py +177 -0
- gsMap/find_latent/st_process.py +781 -0
- gsMap/format_sumstats.py +446 -0
- gsMap/generate_ldscore.py +1018 -0
- gsMap/latent2gene/__init__.py +18 -0
- gsMap/latent2gene/connectivity.py +781 -0
- gsMap/latent2gene/entry_point.py +141 -0
- gsMap/latent2gene/marker_scores.py +1265 -0
- gsMap/latent2gene/memmap_io.py +766 -0
- gsMap/latent2gene/rank_calculator.py +590 -0
- gsMap/latent2gene/row_ordering.py +182 -0
- gsMap/latent2gene/row_ordering_jax.py +159 -0
- gsMap/ldscore/__init__.py +1 -0
- gsMap/ldscore/batch_construction.py +163 -0
- gsMap/ldscore/compute.py +126 -0
- gsMap/ldscore/constants.py +70 -0
- gsMap/ldscore/io.py +262 -0
- gsMap/ldscore/mapping.py +262 -0
- gsMap/ldscore/pipeline.py +615 -0
- gsMap/pipeline/quick_mode.py +134 -0
- gsMap/report/__init__.py +2 -0
- gsMap/report/diagnosis.py +375 -0
- gsMap/report/report.py +100 -0
- gsMap/report/report_data.py +1832 -0
- gsMap/report/static/js_lib/alpine.min.js +5 -0
- gsMap/report/static/js_lib/tailwindcss.js +83 -0
- gsMap/report/static/template.html +2242 -0
- gsMap/report/three_d_combine.py +312 -0
- gsMap/report/three_d_plot/three_d_plot_decorate.py +246 -0
- gsMap/report/three_d_plot/three_d_plot_prepare.py +202 -0
- gsMap/report/three_d_plot/three_d_plots.py +425 -0
- gsMap/report/visualize.py +1409 -0
- gsMap/setup.py +5 -0
- gsMap/spatial_ldsc/__init__.py +0 -0
- gsMap/spatial_ldsc/io.py +656 -0
- gsMap/spatial_ldsc/ldscore_quick_mode.py +912 -0
- gsMap/spatial_ldsc/spatial_ldsc_jax.py +382 -0
- gsMap/spatial_ldsc/spatial_ldsc_multiple_sumstats.py +439 -0
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +610 -0
- gsMap/utils/jackknife.py +518 -0
- gsMap/utils/manhattan_plot.py +643 -0
- gsMap/utils/regression_read.py +177 -0
- gsMap/utils/torch_utils.py +23 -0
- gsmap3d-0.1.0a1.dist-info/METADATA +168 -0
- gsmap3d-0.1.0a1.dist-info/RECORD +74 -0
- gsmap3d-0.1.0a1.dist-info/WHEEL +4 -0
- gsmap3d-0.1.0a1.dist-info/entry_points.txt +2 -0
- gsmap3d-0.1.0a1.dist-info/licenses/LICENSE +21 -0
gsMap/config/base.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base configuration classes and utilities for gsMap.
|
|
3
|
+
"""
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import asdict, dataclass
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from functools import wraps
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Annotated, Any
|
|
11
|
+
|
|
12
|
+
import typer
|
|
13
|
+
import yaml
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.logging import RichHandler
|
|
16
|
+
from rich.panel import Panel
|
|
17
|
+
from rich.syntax import Syntax
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def config_logger():
|
|
21
|
+
logger = logging.getLogger("gsMap")
|
|
22
|
+
# clean up existing handlers
|
|
23
|
+
if logger.hasHandlers():
|
|
24
|
+
logger.handlers.clear()
|
|
25
|
+
|
|
26
|
+
# Set logger to DEBUG to capture all messages
|
|
27
|
+
logger.setLevel(logging.DEBUG)
|
|
28
|
+
|
|
29
|
+
# Create rich console handler for INFO level messages
|
|
30
|
+
console = Console()
|
|
31
|
+
rich_handler = RichHandler(
|
|
32
|
+
console=console,
|
|
33
|
+
show_time=True,
|
|
34
|
+
show_path=False,
|
|
35
|
+
rich_tracebacks=True,
|
|
36
|
+
tracebacks_show_locals=True
|
|
37
|
+
)
|
|
38
|
+
rich_handler.setLevel(logging.INFO)
|
|
39
|
+
rich_handler.setFormatter(
|
|
40
|
+
logging.Formatter("{levelname:.5s} | {name} - {message}", style="{")
|
|
41
|
+
)
|
|
42
|
+
logger.addHandler(rich_handler)
|
|
43
|
+
|
|
44
|
+
# # Create file handler for DEBUG level messages with timestamp
|
|
45
|
+
# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
46
|
+
# log_dir = Path("logs")
|
|
47
|
+
# log_dir.mkdir(exist_ok=True)
|
|
48
|
+
# log_file = log_dir / f"gsMap_{timestamp}.log"
|
|
49
|
+
#
|
|
50
|
+
# file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
|
|
51
|
+
# file_handler.setLevel(logging.DEBUG)
|
|
52
|
+
# file_handler.setFormatter(
|
|
53
|
+
# logging.Formatter(
|
|
54
|
+
# "[{asctime}] {levelname:.5s} | {name}:{funcName}:{lineno} - {message}",
|
|
55
|
+
# style="{"
|
|
56
|
+
# )
|
|
57
|
+
# )
|
|
58
|
+
# logger.addHandler(file_handler)
|
|
59
|
+
#
|
|
60
|
+
# # Log the setup
|
|
61
|
+
# logger.info(f"Logging configured - console: INFO+, file: DEBUG+ -> {log_file}")
|
|
62
|
+
#
|
|
63
|
+
return logger
|
|
64
|
+
|
|
65
|
+
config_logger()
|
|
66
|
+
|
|
67
|
+
def ensure_path_exists(func):
|
|
68
|
+
"""Decorator to ensure path exists when accessing properties."""
|
|
69
|
+
@wraps(func)
|
|
70
|
+
def wrapper(*args, **kwargs):
|
|
71
|
+
result = func(*args, **kwargs)
|
|
72
|
+
if isinstance(result, Path):
|
|
73
|
+
if result.suffix:
|
|
74
|
+
result.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
|
75
|
+
else: # It's a directory path
|
|
76
|
+
result.mkdir(parents=True, exist_ok=True, mode=0o755)
|
|
77
|
+
return result
|
|
78
|
+
return wrapper
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class BaseConfig:
|
|
82
|
+
"""Base configuration class with display and conversion utility."""
|
|
83
|
+
|
|
84
|
+
def to_dict_with_paths_as_strings(self) -> dict[str, Any]:
|
|
85
|
+
"""
|
|
86
|
+
Convert the config object to a dictionary with all Path objects converted to strings.
|
|
87
|
+
Also handles nested Path and Enum objects in dictionaries and lists.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Dictionary representation of the config with all Path objects as strings
|
|
91
|
+
"""
|
|
92
|
+
# Convert config to dict
|
|
93
|
+
config_dict = asdict(self)
|
|
94
|
+
|
|
95
|
+
# Convert all Path and Enum objects in config to strings/values
|
|
96
|
+
for key, value in config_dict.items():
|
|
97
|
+
if isinstance(value, Path):
|
|
98
|
+
config_dict[key] = str(value)
|
|
99
|
+
elif isinstance(value, Enum):
|
|
100
|
+
config_dict[key] = value.value
|
|
101
|
+
elif isinstance(value, dict):
|
|
102
|
+
config_dict[key] = {
|
|
103
|
+
k: (str(v) if isinstance(v, Path) else (v.value if isinstance(v, Enum) else v))
|
|
104
|
+
for k, v in value.items()
|
|
105
|
+
}
|
|
106
|
+
elif isinstance(value, list):
|
|
107
|
+
config_dict[key] = [
|
|
108
|
+
(str(v) if isinstance(v, Path) else (v.value if isinstance(v, Enum) else v))
|
|
109
|
+
for v in value
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
return config_dict
|
|
113
|
+
|
|
114
|
+
def show_config(self, cls: type | None = None):
|
|
115
|
+
"""Show configuration in a nice way using rich."""
|
|
116
|
+
if cls is not None and type(self) is not cls:
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
config_dict = self.to_dict_with_paths_as_strings()
|
|
120
|
+
config_yaml = yaml.dump(config_dict, default_flow_style=False, sort_keys=False)
|
|
121
|
+
|
|
122
|
+
# Get title from docstring (first line)
|
|
123
|
+
doc = inspect.getdoc(type(self))
|
|
124
|
+
title = doc.split('\n')[0] if doc else "Configuration"
|
|
125
|
+
|
|
126
|
+
console = Console()
|
|
127
|
+
console.print(Panel(
|
|
128
|
+
Syntax(config_yaml, "yaml", theme="monokai", line_numbers=True),
|
|
129
|
+
title=f"[bold]{title}[/bold]",
|
|
130
|
+
expand=False
|
|
131
|
+
))
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class ConfigWithAutoPaths(BaseConfig):
|
|
136
|
+
"""Base configuration class with automatic path generation."""
|
|
137
|
+
|
|
138
|
+
# Required from parent
|
|
139
|
+
workdir: Annotated[Path, typer.Option(
|
|
140
|
+
help="Path to the working directory",
|
|
141
|
+
exists=True,
|
|
142
|
+
file_okay=False,
|
|
143
|
+
dir_okay=True,
|
|
144
|
+
resolve_path=True
|
|
145
|
+
)]
|
|
146
|
+
|
|
147
|
+
project_name: Annotated[str, typer.Option(
|
|
148
|
+
help="Name of the project"
|
|
149
|
+
)]
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
@ensure_path_exists
|
|
153
|
+
def project_dir(self) -> Path:
|
|
154
|
+
"""The main project directory, which is workdir / project_name."""
|
|
155
|
+
return Path(self.workdir) / self.project_name
|
|
156
|
+
|
|
157
|
+
def __post_init__(self):
|
|
158
|
+
if self.workdir is None:
|
|
159
|
+
raise ValueError('workdir must be provided.')
|
|
160
|
+
|
|
161
|
+
## ---- Find latent representation paths
|
|
162
|
+
@property
|
|
163
|
+
@ensure_path_exists
|
|
164
|
+
def latent_dir(self) -> Path:
|
|
165
|
+
return self.project_dir / "find_latent_representations"
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
@ensure_path_exists
|
|
169
|
+
def model_path(self) -> Path:
|
|
170
|
+
return self.latent_dir / 'LGCN_model' / 'gsMap_LGCN_.pt'
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def find_latent_metadata_path(self) -> Path:
|
|
174
|
+
return self.latent_dir / 'find_latent_metadata.yaml'
|
|
175
|
+
|
|
176
|
+
## ---- Latent to gene paths
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
@ensure_path_exists
|
|
180
|
+
def latent2gene_dir(self) -> Path:
|
|
181
|
+
"""Directory for latent to gene outputs"""
|
|
182
|
+
return self.project_dir / "latent_to_gene"
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def concatenated_latent_adata_path(self) -> Path:
|
|
186
|
+
"""Path to concatenated latent representations"""
|
|
187
|
+
return self.latent2gene_dir / "concatenated_latent_adata.h5ad"
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def rank_memmap_path(self) -> Path:
|
|
191
|
+
"""Path to rank zarr file"""
|
|
192
|
+
return self.latent2gene_dir / "ranks.dat"
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def mean_frac_path(self) -> Path:
|
|
196
|
+
"""Path to mean expression fraction parquet"""
|
|
197
|
+
return self.latent2gene_dir / "mean_frac.parquet"
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def marker_scores_memmap_path(self) -> Path:
|
|
201
|
+
"""Path to marker scores zarr"""
|
|
202
|
+
return self.latent2gene_dir / "marker_scores.dat"
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def latent2gene_metadata_path(self) -> Path:
|
|
206
|
+
"""Path to latent2gene metadata YAML"""
|
|
207
|
+
return self.latent2gene_dir / "metadata.yaml"
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
## ---- Spatial LDSC paths
|
|
211
|
+
|
|
212
|
+
@property
|
|
213
|
+
@ensure_path_exists
|
|
214
|
+
def ldsc_save_dir(self) -> Path:
|
|
215
|
+
"""Directory for spatial LDSC results"""
|
|
216
|
+
return self.project_dir / "spatial_ldsc"
|
|
217
|
+
|
|
218
|
+
def get_ldsc_result_file(self, trait_name: str) -> Path:
|
|
219
|
+
return Path(f"{self.ldsc_save_dir}/{self.project_name}_{trait_name}.csv.gz")
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
@ensure_path_exists
|
|
223
|
+
def ldscore_save_dir(self) -> Path:
|
|
224
|
+
"""Directory for LD score generation results"""
|
|
225
|
+
return self.project_dir / "generate_ldscore"
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
@ensure_path_exists
|
|
229
|
+
def cauchy_save_dir(self) -> Path:
|
|
230
|
+
return self.project_dir / "cauchy_combination"
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
@ensure_path_exists
|
|
234
|
+
def report_data_dir(self) -> Path:
|
|
235
|
+
"""Directory for report data files (CSV, h5ad) - not needed for HTML viewing"""
|
|
236
|
+
return self.project_dir / "report_data"
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
@ensure_path_exists
|
|
240
|
+
def web_report_dir(self) -> Path:
|
|
241
|
+
"""Directory for self-contained web report (HTML, JS, images)"""
|
|
242
|
+
return self.project_dir / "gsmap_web_report"
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
@ensure_path_exists
|
|
246
|
+
def report_dir(self) -> Path:
|
|
247
|
+
"""Directory for gsMap report outputs - returns web_report_dir for backward compatibility"""
|
|
248
|
+
return self.web_report_dir
|
|
249
|
+
|
|
250
|
+
@ensure_path_exists
|
|
251
|
+
def get_report_dir(self, trait_name: str) -> Path:
|
|
252
|
+
"""Deprecated: Use report_dir property instead"""
|
|
253
|
+
return self.report_dir
|
|
254
|
+
|
|
255
|
+
def get_gsMap_report_file(self, trait_name: str) -> Path:
|
|
256
|
+
"""Path to main HTML report file"""
|
|
257
|
+
return self.report_dir / "index.html"
|
|
258
|
+
|
|
259
|
+
@ensure_path_exists
|
|
260
|
+
def get_manhattan_html_plot_path(self, trait_name: str) -> Path:
|
|
261
|
+
"""Path for Manhattan plot CSV data"""
|
|
262
|
+
return self.report_data_dir / "manhattan_data" / f"{trait_name}_manhattan.csv"
|
|
263
|
+
|
|
264
|
+
@ensure_path_exists
|
|
265
|
+
def get_GSS_plot_dir(self, trait_name: str) -> Path:
|
|
266
|
+
"""Directory for gene diagnostic plots"""
|
|
267
|
+
return self.report_dir / "gene_diagnostic_plots"
|
|
268
|
+
|
|
269
|
+
def get_GSS_plot_select_gene_file(self, trait_name: str) -> Path:
|
|
270
|
+
return self.get_GSS_plot_dir(trait_name) / "plot_genes.csv"
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def ldsc_combined_parquet_path(self) -> Path:
|
|
274
|
+
return self.cauchy_save_dir / f"{self.project_name}_combined_ldsc.parquet"
|
|
275
|
+
|
|
276
|
+
def get_cauchy_result_file(self, trait_name: str, annotation: str | None = None, all_samples: bool = False) -> Path:
|
|
277
|
+
if annotation is None:
|
|
278
|
+
annotation = getattr(self, 'annotation', 'unknown')
|
|
279
|
+
if all_samples:
|
|
280
|
+
return self.cauchy_save_dir / f"{self.project_name}_{trait_name}.{annotation}.cauchy.csv"
|
|
281
|
+
else:
|
|
282
|
+
return self.cauchy_save_dir / f"{self.project_name}_{trait_name}.{annotation}.sample_cauchy.csv"
|
|
283
|
+
|
|
284
|
+
@ensure_path_exists
|
|
285
|
+
def get_gene_diagnostic_info_save_path(self, trait_name: str) -> Path:
|
|
286
|
+
"""Path for gene diagnostic info CSV - uses trait prefix in gss_stats subfolder"""
|
|
287
|
+
return self.report_data_dir / "gss_stats" / f"gene_trait_correlation_{trait_name}.csv"
|
|
288
|
+
|
|
289
|
+
@ensure_path_exists
|
|
290
|
+
def get_gsMap_plot_save_dir(self, trait_name: str) -> Path:
|
|
291
|
+
"""Directory for spatial LDSC plots"""
|
|
292
|
+
return self.report_dir / "spatial_plots"
|
|
293
|
+
|
|
294
|
+
def get_gsMap_html_plot_save_path(self, trait_name: str) -> Path:
|
|
295
|
+
"""Deprecated: Spatial plots are now PNG files in spatial_plots/"""
|
|
296
|
+
return self.get_gsMap_plot_save_dir(trait_name) / f"ldsc_{trait_name}.png"
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from .base import ConfigWithAutoPaths
|
|
8
|
+
from .spatial_ldsc_config import GWASSumstatsConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class CauchyCombinationConfig(GWASSumstatsConfig,ConfigWithAutoPaths):
|
|
13
|
+
"""Cauchy Combination Configuration"""
|
|
14
|
+
|
|
15
|
+
annotation: Annotated[str | None, typer.Option(
|
|
16
|
+
help="Name of the annotation in adata.obs to use",
|
|
17
|
+
)] = None
|
|
18
|
+
|
|
19
|
+
cauchy_annotations: Annotated[list[str] | None, typer.Option(
|
|
20
|
+
help="List of annotations in adata.obs to use",
|
|
21
|
+
)] = field(default_factory=list)
|
|
22
|
+
|
|
23
|
+
annotation_list: list[str] = field(default_factory=list, init=False, repr=False)
|
|
24
|
+
|
|
25
|
+
def __post_init__(self):
|
|
26
|
+
super().__post_init__()
|
|
27
|
+
self._init_annotation_list()
|
|
28
|
+
self.show_config(CauchyCombinationConfig)
|
|
29
|
+
|
|
30
|
+
def _init_annotation_list(self):
|
|
31
|
+
"""Build the unique list of annotations from 'annotation' and 'cauchy_annotations'."""
|
|
32
|
+
if not self.annotation and not self.cauchy_annotations:
|
|
33
|
+
raise ValueError("At least one of 'annotation' or 'cauchy_annotations' must be provided.")
|
|
34
|
+
|
|
35
|
+
# Build unique list of annotations
|
|
36
|
+
from collections import OrderedDict
|
|
37
|
+
self.annotation_list = []
|
|
38
|
+
if self.annotation:
|
|
39
|
+
self.annotation_list.append(self.annotation)
|
|
40
|
+
if self.cauchy_annotations:
|
|
41
|
+
self.annotation_list.extend(self.cauchy_annotations)
|
|
42
|
+
self.annotation_list = list(OrderedDict.fromkeys(self.annotation_list))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def ldsc_traits_result_path_dict(self) -> dict[str, Path]:
|
|
48
|
+
"""
|
|
49
|
+
Discover LDSC result files for the configured traits and return a dictionary mapping trait names to file paths.
|
|
50
|
+
"""
|
|
51
|
+
traits_dict = {}
|
|
52
|
+
|
|
53
|
+
for trait in self.trait_name_list:
|
|
54
|
+
ldsc_input_file = self.get_ldsc_result_file(trait)
|
|
55
|
+
if ldsc_input_file.exists():
|
|
56
|
+
traits_dict[trait] = ldsc_input_file
|
|
57
|
+
else:
|
|
58
|
+
raise FileNotFoundError(f"LDSC result file not found for {trait}: {ldsc_input_file}")
|
|
59
|
+
|
|
60
|
+
if not traits_dict:
|
|
61
|
+
raise FileNotFoundError(f"No valid LDSC result files found for the specified traits in {self.ldsc_save_dir}")
|
|
62
|
+
|
|
63
|
+
return traits_dict
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def check_cauchy_done(config: CauchyCombinationConfig, trait_name: str) -> bool:
|
|
67
|
+
"""
|
|
68
|
+
Check if cauchy step is done for a specific trait.
|
|
69
|
+
Checks both annotation-level and sample-level results for all configured annotations.
|
|
70
|
+
"""
|
|
71
|
+
if not config.annotation_list:
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
for annotation in config.annotation_list:
|
|
75
|
+
anno_result = config.get_cauchy_result_file(trait_name, annotation=annotation, all_samples=True)
|
|
76
|
+
sample_result = config.get_cauchy_result_file(trait_name, annotation=annotation, all_samples=False)
|
|
77
|
+
if not (anno_result.exists() and sample_result.exists()):
|
|
78
|
+
return False
|
|
79
|
+
return True
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration dataclasses for gsMap commands.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Annotated
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
|
|
12
|
+
from .base import ConfigWithAutoPaths
|
|
13
|
+
|
|
14
|
+
# Import module-specific configs for orchestration
|
|
15
|
+
from .find_latent_config import FindLatentRepresentationsConfig
|
|
16
|
+
from .latent2gene_config import LatentToGeneConfig
|
|
17
|
+
from .spatial_ldsc_config import SpatialLDSCConfig
|
|
18
|
+
from .utils import (
|
|
19
|
+
verify_homolog_file_format,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("gsMap.config")
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class CreateSliceMeanConfig:
|
|
26
|
+
"""Configuration for creating slice mean from multiple h5ad files."""
|
|
27
|
+
|
|
28
|
+
slice_mean_output_file: Annotated[Path, typer.Option(
|
|
29
|
+
help="Path to the output file for the slice mean",
|
|
30
|
+
dir_okay=False,
|
|
31
|
+
resolve_path=True
|
|
32
|
+
)]
|
|
33
|
+
|
|
34
|
+
sample_name_list: Annotated[str | list[str], typer.Option(
|
|
35
|
+
help="Space-separated list of sample names"
|
|
36
|
+
)]
|
|
37
|
+
|
|
38
|
+
h5ad_list: Annotated[str | list[str], typer.Option(
|
|
39
|
+
help="Space-separated list of h5ad file paths"
|
|
40
|
+
)]
|
|
41
|
+
|
|
42
|
+
# Optional parameters
|
|
43
|
+
h5ad_yaml: Annotated[Path | None, typer.Option(
|
|
44
|
+
help="Path to YAML file with sample names and h5ad paths",
|
|
45
|
+
exists=True,
|
|
46
|
+
file_okay=True,
|
|
47
|
+
dir_okay=False
|
|
48
|
+
)] = None
|
|
49
|
+
|
|
50
|
+
homolog_file: Annotated[Path | None, typer.Option(
|
|
51
|
+
help="Path to homologous gene conversion file",
|
|
52
|
+
exists=True,
|
|
53
|
+
file_okay=True,
|
|
54
|
+
dir_okay=False
|
|
55
|
+
)] = None
|
|
56
|
+
|
|
57
|
+
data_layer: Annotated[str, typer.Option(
|
|
58
|
+
help="Data layer for gene expression"
|
|
59
|
+
)] = "counts"
|
|
60
|
+
|
|
61
|
+
species: str | None = None
|
|
62
|
+
h5ad_dict: dict | None = None
|
|
63
|
+
|
|
64
|
+
def __post_init__(self):
|
|
65
|
+
# Parse lists if provided as strings
|
|
66
|
+
if isinstance(self.sample_name_list, str):
|
|
67
|
+
self.sample_name_list = self.sample_name_list.split()
|
|
68
|
+
if isinstance(self.h5ad_list, str):
|
|
69
|
+
self.h5ad_list = self.h5ad_list.split()
|
|
70
|
+
|
|
71
|
+
if self.h5ad_list is None and self.h5ad_yaml is None:
|
|
72
|
+
raise ValueError("At least one of --h5ad_list or --h5ad_yaml must be provided.")
|
|
73
|
+
|
|
74
|
+
import yaml
|
|
75
|
+
if self.h5ad_yaml is not None:
|
|
76
|
+
if isinstance(self.h5ad_yaml, str | Path):
|
|
77
|
+
logger.info(f"Reading h5ad yaml file: {self.h5ad_yaml}")
|
|
78
|
+
with open(self.h5ad_yaml) as f:
|
|
79
|
+
h5ad_dict = yaml.safe_load(f)
|
|
80
|
+
else:
|
|
81
|
+
h5ad_dict = self.h5ad_yaml
|
|
82
|
+
elif self.sample_name_list and self.h5ad_list:
|
|
83
|
+
logger.info("Reading sample name list and h5ad list")
|
|
84
|
+
h5ad_dict = dict(zip(self.sample_name_list, self.h5ad_list, strict=False))
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
"Please provide either h5ad_yaml or both sample_name_list and h5ad_list."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Check if sample names are unique
|
|
91
|
+
assert len(h5ad_dict) == len(set(h5ad_dict)), "Sample names must be unique."
|
|
92
|
+
assert len(h5ad_dict) > 1, "At least two samples are required."
|
|
93
|
+
|
|
94
|
+
logger.info(f"Input h5ad files: {h5ad_dict}")
|
|
95
|
+
|
|
96
|
+
# Check if all files exist
|
|
97
|
+
self.h5ad_dict = {}
|
|
98
|
+
for sample_name, h5ad_file in h5ad_dict.items():
|
|
99
|
+
h5ad_file = Path(h5ad_file)
|
|
100
|
+
if not h5ad_file.exists():
|
|
101
|
+
raise FileNotFoundError(f"{h5ad_file} does not exist.")
|
|
102
|
+
self.h5ad_dict[sample_name] = h5ad_file
|
|
103
|
+
|
|
104
|
+
self.slice_mean_output_file = Path(self.slice_mean_output_file)
|
|
105
|
+
self.slice_mean_output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
# Verify homolog file format if provided
|
|
108
|
+
if self.homolog_file is not None:
|
|
109
|
+
verify_homolog_file_format(self)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@dataclass
|
|
114
|
+
class DiagnosisConfig(ConfigWithAutoPaths):
|
|
115
|
+
"""Configuration for diagnosis command."""
|
|
116
|
+
trait_name: Annotated[str, typer.Option(help="Name of the trait")]
|
|
117
|
+
annotation: Annotated[str, typer.Option(help="Annotation layer name")]
|
|
118
|
+
|
|
119
|
+
sumstats_file: Annotated[Path | None, typer.Option(
|
|
120
|
+
help="Path to GWAS summary statistics file",
|
|
121
|
+
exists=True,
|
|
122
|
+
file_okay=True,
|
|
123
|
+
dir_okay=False,
|
|
124
|
+
resolve_path=True
|
|
125
|
+
)] = None
|
|
126
|
+
|
|
127
|
+
top_corr_genes: Annotated[int, typer.Option(help="Number of top correlated genes to display")] = 50
|
|
128
|
+
selected_genes: Annotated[str | None, typer.Option(help="Comma-separated list of specific genes to include")] = None
|
|
129
|
+
|
|
130
|
+
fig_width: Annotated[int | None, typer.Option(help="Width of figures")] = None
|
|
131
|
+
fig_height: Annotated[int | None, typer.Option(help="Height of figures")] = None
|
|
132
|
+
point_size: Annotated[int | None, typer.Option(help="Point size")] = None
|
|
133
|
+
|
|
134
|
+
plot_type: Annotated[str, typer.Option(help="Plot type (gsMap, manhattan, GSS, all)")] = "all"
|
|
135
|
+
plot_origin: Annotated[str, typer.Option(help="Plot origin for spatial plots (upper or lower)")] = "upper"
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def customize_fig(self) -> bool:
|
|
139
|
+
return any([self.fig_width, self.fig_height, self.point_size])
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def hdf5_with_latent_path(self) -> Path:
|
|
143
|
+
return self.concatenated_latent_adata_path
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def mkscore_feather_path(self) -> Path:
|
|
147
|
+
# Fallback to a default name in the latent2gene directory
|
|
148
|
+
return self.latent2gene_dir / f"{self.project_name}_marker_score.feather"
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@dataclass
|
|
152
|
+
class VisualizeConfig(ConfigWithAutoPaths):
|
|
153
|
+
"""Configuration for visualization command."""
|
|
154
|
+
trait_name: Annotated[str, typer.Option(help="Name of the trait")]
|
|
155
|
+
annotation: Annotated[str | None, typer.Option(help="Annotation layer name")] = None
|
|
156
|
+
|
|
157
|
+
fig_title: Annotated[str | None, typer.Option(help="Title for the figure")] = None
|
|
158
|
+
fig_style: Annotated[str, typer.Option(help="Style of the figures (light/dark)")] = "light"
|
|
159
|
+
point_size: Annotated[int | None, typer.Option(help="Point size")] = None
|
|
160
|
+
fig_width: Annotated[int, typer.Option(help="Figure width")] = 800
|
|
161
|
+
fig_height: Annotated[int, typer.Option(help="Figure height")] = 600
|
|
162
|
+
|
|
163
|
+
output_dir: Annotated[Path | None, typer.Option(help="Directory to save output files")] = None
|
|
164
|
+
hdf5_with_latent_path: Annotated[Path | None, typer.Option(help="Path to HDF5 with latent")] = None
|
|
165
|
+
plot_origin: Annotated[str, typer.Option(help="Plot origin for spatial plots (upper or lower)")] = "upper"
|
|
166
|
+
|
|
167
|
+
def __post_init__(self):
|
|
168
|
+
super().__post_init__()
|
|
169
|
+
if self.hdf5_with_latent_path is None:
|
|
170
|
+
self.hdf5_with_latent_path = self.concatenated_latent_adata_path
|
|
171
|
+
if self.output_dir is None:
|
|
172
|
+
self.output_dir = self.get_report_dir(self.trait_name)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@dataclass
|
|
176
|
+
class ThreeDCombineConfig:
|
|
177
|
+
workdir: str
|
|
178
|
+
trait_name: str | None = None
|
|
179
|
+
adata_3d: str | None = None
|
|
180
|
+
project_name: str | None = None
|
|
181
|
+
st_id: str | None = None
|
|
182
|
+
annotation: str | None = None
|
|
183
|
+
spatial_key: str = 'spatial'
|
|
184
|
+
cmap: str | None = None
|
|
185
|
+
point_size: float = 0.01
|
|
186
|
+
background_color: str = 'white'
|
|
187
|
+
n_snapshot: int = 200
|
|
188
|
+
show_outline: bool = False
|
|
189
|
+
save_mp4: bool = False
|
|
190
|
+
save_gif: bool = False
|
|
191
|
+
|
|
192
|
+
def __post_init__(self):
|
|
193
|
+
if self.workdir is None:
|
|
194
|
+
raise ValueError('workdir must be provided.')
|
|
195
|
+
work_dir = Path(self.workdir)
|
|
196
|
+
if self.project_name is not None:
|
|
197
|
+
self.project_dir = work_dir / self.project_name
|
|
198
|
+
else:
|
|
199
|
+
self.project_dir = work_dir
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class RunLinkModeConfig(ConfigWithAutoPaths):
|
|
204
|
+
"""Configuration for running gsMap with linked mode."""
|
|
205
|
+
# Placeholder for link mode config fields
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@dataclass
|
|
210
|
+
class gsMapPipelineConfig(ConfigWithAutoPaths):
|
|
211
|
+
"""Unified configuration for the complete gsMap pipeline"""
|
|
212
|
+
|
|
213
|
+
# Component configurations
|
|
214
|
+
find_latent: FindLatentRepresentationsConfig | None = None
|
|
215
|
+
latent2gene: LatentToGeneConfig | None = None
|
|
216
|
+
spatial_ldsc: SpatialLDSCConfig | None = None
|
|
217
|
+
|
|
218
|
+
def __post_init__(self):
|
|
219
|
+
super().__post_init__()
|
|
220
|
+
# Initialize component configs if they weren't provided
|
|
221
|
+
if self.find_latent is None:
|
|
222
|
+
self.find_latent = FindLatentRepresentationsConfig(
|
|
223
|
+
workdir=self.workdir,
|
|
224
|
+
project_name=self.project_name
|
|
225
|
+
)
|
|
226
|
+
if self.latent2gene is None:
|
|
227
|
+
self.latent2gene = LatentToGeneConfig(
|
|
228
|
+
workdir=self.workdir,
|
|
229
|
+
project_name=self.project_name
|
|
230
|
+
)
|
|
231
|
+
if self.spatial_ldsc is None:
|
|
232
|
+
self.spatial_ldsc = SpatialLDSCConfig(
|
|
233
|
+
workdir=self.workdir,
|
|
234
|
+
project_name=self.project_name
|
|
235
|
+
)
|