gsMap3D 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. gsMap/__init__.py +13 -0
  2. gsMap/__main__.py +4 -0
  3. gsMap/cauchy_combination_test.py +342 -0
  4. gsMap/cli.py +355 -0
  5. gsMap/config/__init__.py +72 -0
  6. gsMap/config/base.py +296 -0
  7. gsMap/config/cauchy_config.py +79 -0
  8. gsMap/config/dataclasses.py +235 -0
  9. gsMap/config/decorators.py +302 -0
  10. gsMap/config/find_latent_config.py +276 -0
  11. gsMap/config/format_sumstats_config.py +54 -0
  12. gsMap/config/latent2gene_config.py +461 -0
  13. gsMap/config/ldscore_config.py +261 -0
  14. gsMap/config/quick_mode_config.py +242 -0
  15. gsMap/config/report_config.py +81 -0
  16. gsMap/config/spatial_ldsc_config.py +334 -0
  17. gsMap/config/utils.py +286 -0
  18. gsMap/find_latent/__init__.py +3 -0
  19. gsMap/find_latent/find_latent_representation.py +312 -0
  20. gsMap/find_latent/gnn/distribution.py +498 -0
  21. gsMap/find_latent/gnn/encoder_decoder.py +186 -0
  22. gsMap/find_latent/gnn/gcn.py +85 -0
  23. gsMap/find_latent/gnn/gene_former.py +164 -0
  24. gsMap/find_latent/gnn/loss.py +18 -0
  25. gsMap/find_latent/gnn/st_model.py +125 -0
  26. gsMap/find_latent/gnn/train_step.py +177 -0
  27. gsMap/find_latent/st_process.py +781 -0
  28. gsMap/format_sumstats.py +446 -0
  29. gsMap/generate_ldscore.py +1018 -0
  30. gsMap/latent2gene/__init__.py +18 -0
  31. gsMap/latent2gene/connectivity.py +781 -0
  32. gsMap/latent2gene/entry_point.py +141 -0
  33. gsMap/latent2gene/marker_scores.py +1265 -0
  34. gsMap/latent2gene/memmap_io.py +766 -0
  35. gsMap/latent2gene/rank_calculator.py +590 -0
  36. gsMap/latent2gene/row_ordering.py +182 -0
  37. gsMap/latent2gene/row_ordering_jax.py +159 -0
  38. gsMap/ldscore/__init__.py +1 -0
  39. gsMap/ldscore/batch_construction.py +163 -0
  40. gsMap/ldscore/compute.py +126 -0
  41. gsMap/ldscore/constants.py +70 -0
  42. gsMap/ldscore/io.py +262 -0
  43. gsMap/ldscore/mapping.py +262 -0
  44. gsMap/ldscore/pipeline.py +615 -0
  45. gsMap/pipeline/quick_mode.py +134 -0
  46. gsMap/report/__init__.py +2 -0
  47. gsMap/report/diagnosis.py +375 -0
  48. gsMap/report/report.py +100 -0
  49. gsMap/report/report_data.py +1832 -0
  50. gsMap/report/static/js_lib/alpine.min.js +5 -0
  51. gsMap/report/static/js_lib/tailwindcss.js +83 -0
  52. gsMap/report/static/template.html +2242 -0
  53. gsMap/report/three_d_combine.py +312 -0
  54. gsMap/report/three_d_plot/three_d_plot_decorate.py +246 -0
  55. gsMap/report/three_d_plot/three_d_plot_prepare.py +202 -0
  56. gsMap/report/three_d_plot/three_d_plots.py +425 -0
  57. gsMap/report/visualize.py +1409 -0
  58. gsMap/setup.py +5 -0
  59. gsMap/spatial_ldsc/__init__.py +0 -0
  60. gsMap/spatial_ldsc/io.py +656 -0
  61. gsMap/spatial_ldsc/ldscore_quick_mode.py +912 -0
  62. gsMap/spatial_ldsc/spatial_ldsc_jax.py +382 -0
  63. gsMap/spatial_ldsc/spatial_ldsc_multiple_sumstats.py +439 -0
  64. gsMap/utils/__init__.py +0 -0
  65. gsMap/utils/generate_r2_matrix.py +610 -0
  66. gsMap/utils/jackknife.py +518 -0
  67. gsMap/utils/manhattan_plot.py +643 -0
  68. gsMap/utils/regression_read.py +177 -0
  69. gsMap/utils/torch_utils.py +23 -0
  70. gsmap3d-0.1.0a1.dist-info/METADATA +168 -0
  71. gsmap3d-0.1.0a1.dist-info/RECORD +74 -0
  72. gsmap3d-0.1.0a1.dist-info/WHEEL +4 -0
  73. gsmap3d-0.1.0a1.dist-info/entry_points.txt +2 -0
  74. gsmap3d-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,261 @@
1
+ """
2
+ Configuration dataclasses for the general LD score framework.
3
+ """
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Annotated
9
+
10
+ import typer
11
+
12
+ from gsMap.config.base import BaseConfig, ConfigWithAutoPaths
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @dataclass
18
+ class LDScoreConfig(BaseConfig):
19
+ """LD Score Weights Configuration"""
20
+
21
+ # Paths
22
+ bfile_root: Annotated[str, typer.Option(
23
+ help="Reference panel prefix template (e.g., 'data/1000G.{chr}')"
24
+ )]
25
+
26
+ hm3_snp_path: Annotated[Path, typer.Option(
27
+ help="Path to HM3 SNP list",
28
+ exists=True,
29
+ file_okay=True,
30
+ dir_okay=False,
31
+ resolve_path=True
32
+ )]
33
+
34
+ output_dir: Annotated[Path | None, typer.Option(
35
+ help="Output directory. If None, uses {workdir}/{project_name}/generate_ldscore"
36
+ )] = None
37
+
38
+ output_filename: Annotated[str, typer.Option(
39
+ help="Prefix for output files"
40
+ )] = "ld_score_weights"
41
+
42
+ # Omics Input
43
+ omics_h5ad_path: Annotated[Path | None, typer.Option(
44
+ help="Path to omics H5AD file",
45
+ exists=True,
46
+ file_okay=True,
47
+ dir_okay=False,
48
+ resolve_path=True
49
+ )] = None
50
+
51
+ # Mapping Input (Strategy A/B)
52
+ mapping_type: Annotated[str, typer.Option(
53
+ help="Mapping type: 'bed' or 'dict'"
54
+ )] = "bed"
55
+
56
+ mapping_file: Annotated[Path | None, typer.Option(
57
+ help="Path to mapping file",
58
+ exists=True,
59
+ file_okay=True,
60
+ dir_okay=False,
61
+ resolve_path=True
62
+ )] = None
63
+
64
+ # Annotation Input (Strategy C - Direct Annotation Matrix)
65
+ annot_file: Annotated[str | None, typer.Option(
66
+ help="Template for annotation files (e.g., 'baseline.{chr}.annot.gz')"
67
+ )] = None
68
+
69
+ # Mapping Strategy parameters
70
+ feature_window_size: Annotated[int, typer.Option(
71
+ help="bp window for mapping (e.g. TSS window)"
72
+ )] = 0
73
+
74
+ strategy: Annotated[str, typer.Option(
75
+ help="Strategy: 'score', 'tss', 'center', 'allow_repeat'"
76
+ )] = "score"
77
+
78
+ # LD Calculation parameters
79
+ ld_wind: Annotated[float, typer.Option(
80
+ help="LD window size"
81
+ )] = 1.0
82
+
83
+ ld_unit: Annotated[str, typer.Option(
84
+ help="LD unit: 'SNP', 'KB', 'CM'"
85
+ )] = "CM"
86
+
87
+ maf_min: Annotated[float, typer.Option(
88
+ help="Minimum MAF filter"
89
+ )] = 0.01
90
+
91
+ # Computation
92
+ chromosomes: Annotated[str, typer.Option(
93
+ help="Chromosomes to process. 'all' uses 1-22 autosomes, or provide a comma-separated list of chromosomes (e.g., '1,2,3')"
94
+ )] = "all"
95
+
96
+ batch_size_hm3: Annotated[int, typer.Option(
97
+ help="Batch size for HM3 SNPs"
98
+ )] = 50
99
+
100
+ # w_ld Calculation
101
+ calculate_w_ld: Annotated[bool, typer.Option(
102
+ help="Whether to calculate w_ld"
103
+ )] = False
104
+
105
+ w_ld_dir: Annotated[Path | None, typer.Option(
106
+ help="Directory for w_ld outputs"
107
+ )] = None
108
+
109
+
110
+ def __post_init__(self):
111
+ """
112
+ Post-initialization processing:
113
+ 1. Parse chromosome string to list.
114
+ 2. Fix PLINK file template.
115
+ 3. Validate file existence.
116
+ """
117
+ # Parse output_dir if provided
118
+ if self.output_dir is not None:
119
+ self.output_dir = Path(self.output_dir)
120
+ else:
121
+ raise ValueError("output_dir must be provided for LDScoreConfig.")
122
+
123
+ # 3. Parse Chromosomes
124
+ if self.chromosomes == "all":
125
+ self.chromosomes = list(range(1, 23))
126
+ elif isinstance(self.chromosomes, str):
127
+ # Handle string input like "1,2,22"
128
+ self.chromosomes = [int(x) for x in self.chromosomes.split(',')]
129
+ # Else it's already a list or properly set
130
+
131
+ # 4. Handle PLINK Prefix Template
132
+ # Ensure bfile_root has {chr} placeholder
133
+ if "{chr}" not in self.bfile_root:
134
+ logger.warning(
135
+ f"The 'bfile_root' ({self.bfile_root}) does not contain the '{{chr}}' placeholder. "
136
+ f"Appending '.{{chr}}' to the prefix."
137
+ )
138
+ self.bfile_root = f"{self.bfile_root}.{{chr}}"
139
+
140
+ # 5. Validate PLINK Files
141
+ logger.info("Validating PLINK binary files based on template...")
142
+ missing_paths = []
143
+
144
+ for chrom in self.chromosomes:
145
+ prefix = self.bfile_root.format(chr=chrom)
146
+ bed_path = Path(f"{prefix}.bed")
147
+ if not bed_path.exists():
148
+ missing_paths.append(str(bed_path))
149
+
150
+ if missing_paths:
151
+ error_msg = (
152
+ f"PLINK .bed files missing for {len(missing_paths)} chromosomes. "
153
+ "The following files were not found:\n" + "\n".join(f" - {p}" for p in missing_paths)
154
+ )
155
+ raise FileNotFoundError(error_msg)
156
+
157
+ logger.info(f"Confirmed all PLINK files exist for {len(self.chromosomes)} chromosomes.")
158
+
159
+ self.show_config(LDScoreConfig)
160
+
161
+
162
+ @dataclass
163
+ class GenerateLDScoreConfig(ConfigWithAutoPaths):
164
+ """Generate LDScore Configuration"""
165
+
166
+ # Required from parent
167
+ workdir: Annotated[Path, typer.Option(
168
+ help="Path to the working directory",
169
+ exists=True,
170
+ file_okay=False,
171
+ dir_okay=True,
172
+ resolve_path=True
173
+ )]
174
+
175
+ chrom: Annotated[str, typer.Option(
176
+ help='Chromosome id (1-22) or "all"'
177
+ )]
178
+
179
+ bfile_root: Annotated[str, typer.Option(
180
+ help="Root path for genotype plink bfiles (.bim, .bed, .fam)"
181
+ )]
182
+
183
+ gtf_annotation_file: Annotated[Path, typer.Option(
184
+ help="Path to GTF annotation file",
185
+ exists=True,
186
+ file_okay=True,
187
+ dir_okay=False
188
+ )]
189
+
190
+ sample_name: Annotated[str | None, typer.Option(
191
+ help="Name of the sample"
192
+ )] = None
193
+
194
+ keep_snp_root: str | None = None # Internal field
195
+
196
+ gene_window_size: Annotated[int, typer.Option(
197
+ help="Gene window size in base pairs",
198
+ min=1000,
199
+ max=1000000
200
+ )] = 50000
201
+
202
+ enhancer_annotation_file: Annotated[Path | None, typer.Option(
203
+ help="Path to enhancer annotation file",
204
+ exists=True,
205
+ file_okay=True,
206
+ dir_okay=False
207
+ )] = None
208
+
209
+ snp_multiple_enhancer_strategy: Annotated[str, typer.Option(
210
+ help="Strategy for handling multiple enhancers per SNP",
211
+ case_sensitive=False
212
+ )] = "max_mkscore"
213
+
214
+ gene_window_enhancer_priority: Annotated[str | None, typer.Option(
215
+ help="Priority between gene window and enhancer annotations"
216
+ )] = None
217
+
218
+ additional_baseline_annotation: Annotated[str | None, typer.Option(
219
+ help="Path of additional baseline annotations"
220
+ )] = None
221
+
222
+ spots_per_chunk: Annotated[int, typer.Option(
223
+ help="Number of spots per chunk",
224
+ min=100,
225
+ max=10000
226
+ )] = 1000
227
+
228
+ ld_wind: Annotated[int, typer.Option(
229
+ help="LD window size",
230
+ min=1,
231
+ max=10
232
+ )] = 1
233
+
234
+ ld_unit: Annotated[str, typer.Option(
235
+ help="Unit for LD window",
236
+ case_sensitive=False
237
+ )] = "CM"
238
+
239
+ # Additional fields
240
+ ldscore_save_format: str = "feather"
241
+ save_pre_calculate_snp_gene_weight_matrix: bool = False
242
+ baseline_annotation_dir: str | None = None
243
+ SNP_gene_pair_dir: str | None = None
244
+
245
+
246
+ def __post_init__(self):
247
+ super().__post_init__()
248
+ self.show_config(GenerateLDScoreConfig)
249
+
250
+
251
+ def check_ldscore_done(config: GenerateLDScoreConfig) -> bool:
252
+ """
253
+ Check if generate_ldscore step is done.
254
+ """
255
+ # Assuming it's done if w_ld directory exists and has files
256
+ w_ld_dir = Path(config.ldscore_save_dir) / "w_ld"
257
+ if not w_ld_dir.exists():
258
+ return False
259
+
260
+ # Check if there are any .l2.ldscore.gz files
261
+ return any(w_ld_dir.glob("*.l2.ldscore.gz"))
@@ -0,0 +1,242 @@
1
+ """
2
+ Configuration for Quick Mode pipeline.
3
+ """
4
+
5
+ import logging
6
+ from dataclasses import dataclass, fields
7
+ from pathlib import Path
8
+ from typing import Annotated
9
+
10
+ import typer
11
+
12
+ from gsMap.config.base import ConfigWithAutoPaths
13
+
14
+ from .cauchy_config import CauchyCombinationConfig
15
+
16
+ # Use relative imports to avoid circular dependency
17
+ from .find_latent_config import FindLatentRepresentationsConfig
18
+ from .latent2gene_config import DatasetType, LatentToGeneConfig
19
+ from .report_config import ReportConfig
20
+ from .spatial_ldsc_config import SpatialLDSCConfig
21
+
22
+ logger = logging.getLogger("gsMap.config")
23
+
24
+ @dataclass
25
+ class QuickModeConfig(ReportConfig, SpatialLDSCConfig, LatentToGeneConfig, FindLatentRepresentationsConfig, ConfigWithAutoPaths):
26
+ """Quick Mode Pipeline Configuration"""
27
+ __core_only__ = True
28
+
29
+ # ------------------------------------------------------------------------
30
+ # Pipeline Control
31
+ # ------------------------------------------------------------------------
32
+ start_step: Annotated[str, typer.Option(
33
+ help="Step to start execution from (find_latent, latent2gene, spatial_ldsc, cauchy, report)",
34
+ case_sensitive=False
35
+ )] = "find_latent"
36
+
37
+ stop_step: Annotated[str | None, typer.Option(
38
+ help="Step to stop execution at (inclusive)",
39
+ case_sensitive=False
40
+ )] = None
41
+
42
+
43
+ def __post_init__(self):
44
+ ConfigWithAutoPaths.__post_init__(self)
45
+
46
+ self._init_sumstats()
47
+ self._init_annotation_list()
48
+
49
+ if self.is_both_latent_and_gene_running:
50
+ self.high_quality_neighbor_filter = self.high_quality_cell_qc
51
+
52
+ # Use dual embeddings if both steps are running
53
+ if self.latent_representation_niche is None:
54
+ self.latent_representation_niche = "emb_niche"
55
+ if self.latent_representation_cell is None:
56
+ self.latent_representation_cell = "emb_cell"
57
+ self.show_config(QuickModeConfig)
58
+
59
+ @property
60
+ def is_both_latent_and_gene_running(self) -> bool:
61
+ """Check if both find_latent and latent2gene are in the execution range."""
62
+ steps = ["find_latent", "latent2gene", "spatial_ldsc", "cauchy", "report"]
63
+ try:
64
+ start_idx = steps.index(self.start_step)
65
+ stop_idx = steps.index(self.stop_step) if self.stop_step else len(steps) - 1
66
+ return start_idx <= 0 and stop_idx >= 1
67
+ except ValueError:
68
+ return False
69
+
70
+ @property
71
+ def find_latent_config(self) -> FindLatentRepresentationsConfig:
72
+ return FindLatentRepresentationsConfig(**{
73
+ f.name: getattr(self, f.name) for f in fields(FindLatentRepresentationsConfig) if f.init
74
+ })
75
+
76
+ @property
77
+ def latent2gene_config(self) -> LatentToGeneConfig:
78
+ params = {f.name: getattr(self, f.name) for f in fields(LatentToGeneConfig) if f.init}
79
+ return LatentToGeneConfig(**params)
80
+
81
+ @property
82
+ def spatial_ldsc_config(self) -> SpatialLDSCConfig:
83
+ return SpatialLDSCConfig(**{
84
+ f.name: getattr(self, f.name) for f in fields(SpatialLDSCConfig) if f.init
85
+ })
86
+
87
+ @property
88
+ def report_config(self) -> ReportConfig:
89
+ return ReportConfig(**{
90
+ f.name: getattr(self, f.name) for f in fields(ReportConfig) if f.init and hasattr(self, f.name)
91
+ })
92
+
93
+ @property
94
+ def cauchy_config(self) -> CauchyCombinationConfig:
95
+ return CauchyCombinationConfig(**{
96
+ f.name: getattr(self, f.name) for f in fields(CauchyCombinationConfig) if f.init and hasattr(self, f.name)
97
+ })
98
+
99
+
100
+
101
+ def check_report_done(config: QuickModeConfig, verbose: bool = False) -> bool:
102
+
103
+ missing_data_files, missing_web_files = get_report_missing_files(config)
104
+ missing_files = missing_data_files + missing_web_files
105
+
106
+ if missing_files and verbose:
107
+ logger.info(f"Report incomplete. Missing {len(missing_files)} files:")
108
+ for f in missing_files[:10]: # Show first 10
109
+ logger.info(f" - {f}")
110
+ if len(missing_files) > 10:
111
+ logger.info(f" ... and {len(missing_files) - 10} more")
112
+
113
+ return len(missing_files) == 0
114
+
115
+
116
+ def get_report_missing_files(config: QuickModeConfig) -> tuple[list[Path], list[Path]]:
117
+ """
118
+ Get lists of missing report files, categorized by type.
119
+
120
+ Returns:
121
+ Tuple of (missing_data_files, missing_web_files)
122
+ """
123
+ missing_data_files = []
124
+ missing_web_files = []
125
+
126
+ # Get dataset type (default to spatial2D if not found)
127
+ dataset_type = config.dataset_type
128
+
129
+ # === Web Report Files ===
130
+ web_report_dir = config.web_report_dir
131
+ js_data_dir = web_report_dir / "js_data"
132
+
133
+ core_web_files = [
134
+ web_report_dir / "index.html",
135
+ web_report_dir / "report_meta.json",
136
+ js_data_dir / "sample_index.js",
137
+ js_data_dir / "report_meta.js",
138
+ js_data_dir / "cauchy_results.js",
139
+ ]
140
+
141
+ for f in core_web_files:
142
+ if not f.exists():
143
+ missing_web_files.append(f)
144
+
145
+ # === Data Files ===
146
+ report_data_dir = config.report_data_dir
147
+
148
+ core_data_files = [
149
+ report_data_dir / "spot_metadata.csv",
150
+ report_data_dir / "gene_list.csv",
151
+ report_data_dir / "cauchy_results.csv",
152
+ ]
153
+
154
+ for f in core_data_files:
155
+ if not f.exists():
156
+ missing_data_files.append(f)
157
+
158
+ # Per-trait files
159
+ traits = config.trait_name_list
160
+ annotation_list = config.annotation_list
161
+
162
+ for trait in traits:
163
+ trait_gss_csv = report_data_dir / "gss_stats" / f"gene_trait_correlation_{trait}.csv"
164
+ trait_manhattan_csv = report_data_dir / "manhattan_data" / f"{trait}_manhattan.csv"
165
+
166
+ if not trait_gss_csv.exists():
167
+ missing_data_files.append(trait_gss_csv)
168
+ if not trait_manhattan_csv.exists():
169
+ missing_data_files.append(trait_manhattan_csv)
170
+
171
+ trait_gss_js = js_data_dir / "gss_stats" / f"gene_trait_correlation_{trait}.js"
172
+ trait_manhattan_js = js_data_dir / f"manhattan_{trait}.js"
173
+
174
+ if not trait_gss_js.exists():
175
+ missing_web_files.append(trait_gss_js)
176
+ if not trait_manhattan_js.exists():
177
+ missing_web_files.append(trait_manhattan_js)
178
+
179
+ # Per-sample spatial JS files (Spatial only)
180
+ if dataset_type in (DatasetType.SPATIAL_2D, DatasetType.SPATIAL_3D):
181
+ sample_h5ad_dict = config.sample_h5ad_dict
182
+ if sample_h5ad_dict:
183
+ for sample_name in sample_h5ad_dict.keys():
184
+ safe_name = "".join(c if c.isalnum() else "_" for c in sample_name)
185
+ sample_js = js_data_dir / f"sample_{safe_name}_spatial.js"
186
+ if not sample_js.exists():
187
+ missing_web_files.append(sample_js)
188
+
189
+ # Gene diagnostic plots directory (Spatial only)
190
+ gene_plot_dir = web_report_dir / "gene_diagnostic_plots"
191
+ if not gene_plot_dir.exists():
192
+ missing_web_files.append(gene_plot_dir)
193
+
194
+ # 3D specific files (Spatial 3D only)
195
+ if dataset_type == DatasetType.SPATIAL_3D:
196
+ three_d_data_dir = report_data_dir / "spatial_3d"
197
+ three_d_web_dir = web_report_dir / "spatial_3d"
198
+
199
+ # 3D H5AD file
200
+ h5ad_3d = three_d_data_dir / "spatial_3d.h5ad"
201
+ if not h5ad_3d.exists():
202
+ missing_data_files.append(h5ad_3d)
203
+
204
+ # 3D HTML plot files (one per trait/annotation)
205
+ for trait in traits:
206
+ safe_trait = "".join(c if c.isalnum() else "_" for c in trait)
207
+ trait_3d_html = three_d_web_dir / f"spatial_3d_trait_{safe_trait}.html"
208
+ if not trait_3d_html.exists():
209
+ missing_web_files.append(trait_3d_html)
210
+
211
+ for anno in annotation_list:
212
+ safe_anno = "".join(c if c.isalnum() else "_" for c in anno)
213
+ anno_3d_html = three_d_web_dir / f"spatial_3d_anno_{safe_anno}.html"
214
+ if not anno_3d_html.exists():
215
+ missing_web_files.append(anno_3d_html)
216
+
217
+ # Multi-sample plots (Spatial only, and if enabled)
218
+ if dataset_type != DatasetType.SCRNA_SEQ and config.generate_multi_sample_plots:
219
+ spatial_plot_dir = web_report_dir / "spatial_plots"
220
+ annotation_plot_dir = web_report_dir / "annotation_plots"
221
+
222
+ for trait in traits:
223
+ plot_path = spatial_plot_dir / f"ldsc_{trait}.png"
224
+ if not plot_path.exists():
225
+ missing_web_files.append(plot_path)
226
+
227
+ for anno in annotation_list:
228
+ plot_path = annotation_plot_dir / f"anno_{anno}.png"
229
+ if not plot_path.exists():
230
+ missing_web_files.append(plot_path)
231
+
232
+ # UMAP data (optional)
233
+ concat_adata_path = config.concatenated_latent_adata_path
234
+ if concat_adata_path and concat_adata_path.exists():
235
+ umap_file = report_data_dir / "umap_data.csv"
236
+ umap_js = js_data_dir / "umap_data.js"
237
+ if not umap_file.exists():
238
+ missing_data_files.append(umap_file)
239
+ if not umap_js.exists():
240
+ missing_web_files.append(umap_js)
241
+
242
+ return missing_data_files, missing_web_files
@@ -0,0 +1,81 @@
1
+ """
2
+ Configuration for generating reports.
3
+ """
4
+ import logging
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+
11
+ from .base import ConfigWithAutoPaths, ensure_path_exists
12
+ from .cauchy_config import CauchyCombinationConfig
13
+
14
+ logger = logging.getLogger("gsMap.config")
15
+
16
+ @dataclass
17
+ class ReportConfig(CauchyCombinationConfig,ConfigWithAutoPaths):
18
+ """Report Generation Configuration"""
19
+
20
+ downsampling_n_spots_pcc: Annotated[int, typer.Option(
21
+ help="Number of spots to downsample for PCC calculation if n_spots > this value",
22
+ min=1000,
23
+ max=100000
24
+ )] = 20000
25
+
26
+ downsampling_n_spots_3d: Annotated[int, typer.Option(
27
+ help="Number of spots to downsample for 3D visualization if n_spots > this value",
28
+ min=1000,
29
+ max=2000000
30
+ )] = 1000000
31
+
32
+ downsampling_n_spots_2d: Annotated[int, typer.Option(
33
+ help="Max spots per sample for 2D distribution plots. Samples with more spots will be randomly downsampled.",
34
+ min=10000,
35
+ max=500000
36
+ )] = 250000
37
+
38
+ top_corr_genes: Annotated[int, typer.Option(
39
+ help="Number of top correlated genes to display",
40
+ min=1,
41
+ max=500
42
+ )] = 50
43
+
44
+ # Advanced visualization parameters
45
+ single_sample_multi_trait_max_cols: int = 5
46
+ subsample_n_points: int | None = None
47
+ single_sample_multi_trait_subplot_width_inches: float = 4.0
48
+ single_sample_multi_trait_dpi: int = 300
49
+ enable_pdf_output: bool = True
50
+ hover_text_list: list | None = None
51
+ single_trait_multi_sample_max_cols: int = 8
52
+ single_trait_multi_sample_subplot_width_inches: float = 4.0
53
+ single_trait_multi_sample_scaling_factor: float = 1.0
54
+ single_trait_multi_sample_dpi: int = 300
55
+ share_coords: bool = False
56
+
57
+ # Weather to generate single-feature multi-sample plots (LDSC, annotation, and gene diagnostic plots)
58
+ generate_multi_sample_plots: bool = False
59
+
60
+ # Plot origin for spatial plots ('upper' or 'lower')
61
+ plot_origin: Annotated[str, typer.Option(
62
+ help="Plot origin for spatial plots ('upper' or 'lower'). 'upper' will flip the y-axis (standard for images)."
63
+ )] = "upper"
64
+
65
+ # Legend marker size for categorical plots
66
+ legend_marker_size: float = 10.0
67
+
68
+ # Force re-run of report generation even if results exist
69
+ force_report_re_run: bool = False
70
+
71
+ # Compatibility properties for visualization paths
72
+ @property
73
+ @ensure_path_exists
74
+ def visualization_result_dir(self) -> Path:
75
+ return self.project_dir / "report" / self.project_name / (self.trait_name or "multi_trait")
76
+
77
+
78
+ def __post_init__(self):
79
+ CauchyCombinationConfig.__post_init__(self)
80
+ self.show_config(ReportConfig)
81
+