smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/_version.py +1 -1
- smftools/cli/chimeric_adata.py +1563 -0
- smftools/cli/helpers.py +18 -2
- smftools/cli/hmm_adata.py +18 -1
- smftools/cli/latent_adata.py +522 -67
- smftools/cli/load_adata.py +2 -2
- smftools/cli/preprocess_adata.py +32 -93
- smftools/cli/recipes.py +26 -0
- smftools/cli/spatial_adata.py +23 -109
- smftools/cli/variant_adata.py +423 -0
- smftools/cli_entry.py +41 -5
- smftools/config/conversion.yaml +0 -10
- smftools/config/deaminase.yaml +3 -0
- smftools/config/default.yaml +49 -13
- smftools/config/experiment_config.py +96 -3
- smftools/constants.py +4 -0
- smftools/hmm/call_hmm_peaks.py +1 -1
- smftools/informatics/binarize_converted_base_identities.py +2 -89
- smftools/informatics/converted_BAM_to_adata.py +53 -13
- smftools/informatics/h5ad_functions.py +83 -0
- smftools/informatics/modkit_extract_to_adata.py +4 -0
- smftools/plotting/__init__.py +26 -12
- smftools/plotting/autocorrelation_plotting.py +22 -4
- smftools/plotting/chimeric_plotting.py +1893 -0
- smftools/plotting/classifiers.py +28 -14
- smftools/plotting/general_plotting.py +58 -3362
- smftools/plotting/hmm_plotting.py +1586 -2
- smftools/plotting/latent_plotting.py +804 -0
- smftools/plotting/plotting_utils.py +243 -0
- smftools/plotting/position_stats.py +16 -8
- smftools/plotting/preprocess_plotting.py +281 -0
- smftools/plotting/qc_plotting.py +8 -3
- smftools/plotting/spatial_plotting.py +1134 -0
- smftools/plotting/variant_plotting.py +1231 -0
- smftools/preprocessing/__init__.py +3 -0
- smftools/preprocessing/append_base_context.py +1 -1
- smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
- smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
- smftools/preprocessing/append_variant_call_layer.py +480 -0
- smftools/preprocessing/flag_duplicate_reads.py +4 -4
- smftools/preprocessing/invert_adata.py +1 -0
- smftools/readwrite.py +109 -85
- smftools/tools/__init__.py +6 -0
- smftools/tools/calculate_knn.py +121 -0
- smftools/tools/calculate_nmf.py +18 -7
- smftools/tools/calculate_pca.py +180 -0
- smftools/tools/calculate_umap.py +70 -154
- smftools/tools/position_stats.py +4 -4
- smftools/tools/rolling_nn_distance.py +640 -3
- smftools/tools/sequence_alignment.py +140 -0
- smftools/tools/tensor_factorization.py +52 -4
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
- {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
smftools/cli/load_adata.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import Iterable, Union
|
|
|
7
7
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
|
-
from smftools.constants import
|
|
10
|
+
from smftools.constants import LOAD_DIR, LOGGING_DIR
|
|
11
11
|
from smftools.logging_utils import get_logger, setup_logging
|
|
12
12
|
|
|
13
13
|
from .helpers import AdataPaths
|
|
@@ -105,7 +105,7 @@ def load_adata(config_path: str):
|
|
|
105
105
|
from datetime import datetime
|
|
106
106
|
from importlib import resources
|
|
107
107
|
|
|
108
|
-
from ..readwrite import
|
|
108
|
+
from ..readwrite import make_dirs
|
|
109
109
|
from .helpers import get_adata_paths, load_experiment_config
|
|
110
110
|
|
|
111
111
|
# -----------------------------
|
smftools/cli/preprocess_adata.py
CHANGED
|
@@ -6,7 +6,13 @@ from typing import Optional, Tuple
|
|
|
6
6
|
|
|
7
7
|
import anndata as ad
|
|
8
8
|
|
|
9
|
-
from smftools.constants import
|
|
9
|
+
from smftools.constants import (
|
|
10
|
+
BASE_QUALITY_SCORES,
|
|
11
|
+
DEMUX_TYPE,
|
|
12
|
+
LOGGING_DIR,
|
|
13
|
+
PREPROCESS_DIR,
|
|
14
|
+
READ_SPAN_MASK,
|
|
15
|
+
)
|
|
10
16
|
from smftools.logging_utils import get_logger, setup_logging
|
|
11
17
|
|
|
12
18
|
logger = get_logger(__name__)
|
|
@@ -175,10 +181,6 @@ def preprocess_adata_core(
|
|
|
175
181
|
- `pp_adata_path` and `pp_dup_rem_adata_path` are the target output paths for
|
|
176
182
|
preprocessed and preprocessed+deduplicated AnnData.
|
|
177
183
|
|
|
178
|
-
Does NOT:
|
|
179
|
-
- Decide which stage to load from (that's the wrapper's job).
|
|
180
|
-
- Decide whether to skip entirely; it always runs its steps, but individual
|
|
181
|
-
sub-steps may skip based on `cfg.bypass_*` or directory existence.
|
|
182
184
|
|
|
183
185
|
Returns
|
|
184
186
|
-------
|
|
@@ -198,12 +200,10 @@ def preprocess_adata_core(
|
|
|
198
200
|
from ..plotting import (
|
|
199
201
|
plot_read_qc_histograms,
|
|
200
202
|
plot_read_span_quality_clustermaps,
|
|
201
|
-
plot_sequence_integer_encoding_clustermaps,
|
|
202
203
|
)
|
|
203
204
|
from ..preprocessing import (
|
|
204
205
|
append_base_context,
|
|
205
206
|
append_binary_layer_by_base_context,
|
|
206
|
-
append_mismatch_frequency_sites,
|
|
207
207
|
binarize_adata,
|
|
208
208
|
binarize_on_Youden,
|
|
209
209
|
calculate_complexity_II,
|
|
@@ -476,6 +476,22 @@ def preprocess_adata_core(
|
|
|
476
476
|
from_valid_sites_only=True,
|
|
477
477
|
)
|
|
478
478
|
|
|
479
|
+
# -----------------------------
|
|
480
|
+
# Optional inversion along positions axis
|
|
481
|
+
# -----------------------------
|
|
482
|
+
if getattr(cfg, "invert_adata", False):
|
|
483
|
+
adata = invert_adata(adata)
|
|
484
|
+
|
|
485
|
+
# -----------------------------
|
|
486
|
+
# Optional reindexing by reference
|
|
487
|
+
# -----------------------------
|
|
488
|
+
reindex_references_adata(
|
|
489
|
+
adata,
|
|
490
|
+
reference_col=cfg.reference_column,
|
|
491
|
+
offsets=cfg.reindexing_offsets,
|
|
492
|
+
new_col=cfg.reindexed_var_suffix,
|
|
493
|
+
)
|
|
494
|
+
|
|
479
495
|
############### Duplicate detection for conversion/deamination SMF ###############
|
|
480
496
|
if smf_modality != "direct":
|
|
481
497
|
references = adata.obs[cfg.reference_column].cat.categories
|
|
@@ -511,7 +527,7 @@ def preprocess_adata_core(
|
|
|
511
527
|
hierarchical_metric="euclidean",
|
|
512
528
|
hierarchical_window=cfg.duplicate_detection_window_size_for_hamming_neighbors,
|
|
513
529
|
demux_types=cfg.duplicate_detection_demux_types_to_use,
|
|
514
|
-
demux_col=
|
|
530
|
+
demux_col=DEMUX_TYPE,
|
|
515
531
|
)
|
|
516
532
|
|
|
517
533
|
# Use the flagged duplicate read groups and perform complexity analysis
|
|
@@ -537,96 +553,19 @@ def preprocess_adata_core(
|
|
|
537
553
|
adata_unique = adata
|
|
538
554
|
########################################################################################################################
|
|
539
555
|
|
|
540
|
-
# -----------------------------
|
|
541
|
-
# Optional inversion along positions axis
|
|
542
|
-
# -----------------------------
|
|
543
|
-
if getattr(cfg, "invert_adata", False):
|
|
544
|
-
adata = invert_adata(adata)
|
|
545
|
-
|
|
546
|
-
# -----------------------------
|
|
547
|
-
# Optional reindexing by reference
|
|
548
|
-
# -----------------------------
|
|
549
|
-
reindex_references_adata(
|
|
550
|
-
adata,
|
|
551
|
-
reference_col=cfg.reference_column,
|
|
552
|
-
offsets=cfg.reindexing_offsets,
|
|
553
|
-
new_col=cfg.reindexed_var_suffix,
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
############################################### Append mismatch frequency per position ###############################################
|
|
557
|
-
append_mismatch_frequency_sites(
|
|
558
|
-
adata_unique,
|
|
559
|
-
ref_column=cfg.reference_column,
|
|
560
|
-
mismatch_layer=cfg.mismatch_frequency_layer,
|
|
561
|
-
read_span_layer=cfg.mismatch_frequency_read_span_layer,
|
|
562
|
-
mismatch_frequency_range=cfg.mismatch_frequency_range,
|
|
563
|
-
bypass=cfg.bypass_append_mismatch_frequency_sites,
|
|
564
|
-
force_redo=cfg.force_redo_append_mismatch_frequency_sites,
|
|
565
|
-
)
|
|
566
|
-
|
|
567
|
-
############################################### Plot integer sequence encoding clustermaps ###############################################
|
|
568
|
-
if "sequence_integer_encoding" not in adata.layers:
|
|
569
|
-
logger.debug(
|
|
570
|
-
"sequence_integer_encoding layer not found; skipping integer encoding clustermaps."
|
|
571
|
-
)
|
|
572
|
-
else:
|
|
573
|
-
pp_seq_clustermap_dir = preprocess_directory / "06_sequence_integer_encoding_clustermaps"
|
|
574
|
-
if pp_seq_clustermap_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
575
|
-
logger.debug(
|
|
576
|
-
f"{pp_seq_clustermap_dir} already exists. Skipping sequence integer encoding clustermaps."
|
|
577
|
-
)
|
|
578
|
-
else:
|
|
579
|
-
make_dirs([pp_seq_clustermap_dir])
|
|
580
|
-
plot_sequence_integer_encoding_clustermaps(
|
|
581
|
-
adata,
|
|
582
|
-
sample_col=cfg.sample_name_col_for_plotting,
|
|
583
|
-
reference_col=cfg.reference_column,
|
|
584
|
-
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
585
|
-
min_quality=None,
|
|
586
|
-
min_length=None,
|
|
587
|
-
min_mapped_length_to_reference_length_ratio=None,
|
|
588
|
-
sort_by="none",
|
|
589
|
-
max_unknown_fraction=0.5,
|
|
590
|
-
save_path=pp_seq_clustermap_dir,
|
|
591
|
-
show_position_axis=True,
|
|
592
|
-
)
|
|
593
|
-
|
|
594
|
-
pp_dedup_seq_clustermap_dir = (
|
|
595
|
-
preprocess_directory / "deduplicated" / "06_sequence_integer_encoding_clustermaps"
|
|
596
|
-
)
|
|
597
|
-
if pp_dedup_seq_clustermap_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
598
|
-
logger.debug(
|
|
599
|
-
f"{pp_dedup_seq_clustermap_dir} already exists. Skipping sequence integer encoding clustermaps."
|
|
600
|
-
)
|
|
601
|
-
else:
|
|
602
|
-
make_dirs([pp_dedup_seq_clustermap_dir])
|
|
603
|
-
plot_sequence_integer_encoding_clustermaps(
|
|
604
|
-
adata_unique,
|
|
605
|
-
sample_col=cfg.sample_name_col_for_plotting,
|
|
606
|
-
reference_col=cfg.reference_column,
|
|
607
|
-
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
608
|
-
min_quality=None,
|
|
609
|
-
min_length=None,
|
|
610
|
-
min_mapped_length_to_reference_length_ratio=None,
|
|
611
|
-
sort_by="none",
|
|
612
|
-
max_unknown_fraction=0.5,
|
|
613
|
-
save_path=pp_dedup_seq_clustermap_dir,
|
|
614
|
-
show_position_axis=True,
|
|
615
|
-
)
|
|
616
|
-
|
|
617
556
|
############################################### Plot read span mask + base quality clustermaps ###############################################
|
|
618
557
|
quality_layer = None
|
|
619
|
-
if
|
|
620
|
-
quality_layer =
|
|
558
|
+
if BASE_QUALITY_SCORES in adata.layers:
|
|
559
|
+
quality_layer = BASE_QUALITY_SCORES
|
|
621
560
|
elif "base_qualities" in adata.layers:
|
|
622
561
|
quality_layer = "base_qualities"
|
|
623
562
|
|
|
624
|
-
if
|
|
563
|
+
if READ_SPAN_MASK not in adata.layers or quality_layer is None:
|
|
625
564
|
logger.debug(
|
|
626
565
|
"read_span_mask and base quality layers not found; skipping read span/base quality clustermaps."
|
|
627
566
|
)
|
|
628
567
|
else:
|
|
629
|
-
pp_span_quality_dir = preprocess_directory / "
|
|
568
|
+
pp_span_quality_dir = preprocess_directory / "06_read_span_and_quality_clustermaps"
|
|
630
569
|
if pp_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
631
570
|
logger.debug(
|
|
632
571
|
f"{pp_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
|
|
@@ -638,7 +577,7 @@ def preprocess_adata_core(
|
|
|
638
577
|
sample_col=cfg.sample_name_col_for_plotting,
|
|
639
578
|
reference_col=cfg.reference_column,
|
|
640
579
|
quality_layer=quality_layer,
|
|
641
|
-
read_span_layer=
|
|
580
|
+
read_span_layer=READ_SPAN_MASK,
|
|
642
581
|
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
643
582
|
save_path=pp_span_quality_dir,
|
|
644
583
|
show_position_axis=True,
|
|
@@ -646,20 +585,20 @@ def preprocess_adata_core(
|
|
|
646
585
|
)
|
|
647
586
|
|
|
648
587
|
pp_dedup_span_quality_dir = (
|
|
649
|
-
preprocess_directory / "deduplicated" / "
|
|
588
|
+
preprocess_directory / "deduplicated" / "06_read_span_and_quality_clustermaps"
|
|
650
589
|
)
|
|
651
590
|
if pp_dedup_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
|
|
652
591
|
logger.debug(
|
|
653
592
|
f"{pp_dedup_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
|
|
654
593
|
)
|
|
655
|
-
elif quality_layer in adata_unique.layers and
|
|
594
|
+
elif quality_layer in adata_unique.layers and READ_SPAN_MASK in adata_unique.layers:
|
|
656
595
|
make_dirs([pp_dedup_span_quality_dir])
|
|
657
596
|
plot_read_span_quality_clustermaps(
|
|
658
597
|
adata_unique,
|
|
659
598
|
sample_col=cfg.sample_name_col_for_plotting,
|
|
660
599
|
reference_col=cfg.reference_column,
|
|
661
600
|
quality_layer=quality_layer,
|
|
662
|
-
read_span_layer=
|
|
601
|
+
read_span_layer=READ_SPAN_MASK,
|
|
663
602
|
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
664
603
|
save_path=pp_dedup_span_quality_dir,
|
|
665
604
|
show_position_axis=True,
|
smftools/cli/recipes.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import anndata as ad
|
|
7
|
+
|
|
8
|
+
from ..cli.chimeric_adata import chimeric_adata
|
|
9
|
+
from ..cli.hmm_adata import hmm_adata
|
|
10
|
+
from ..cli.latent_adata import latent_adata
|
|
11
|
+
from ..cli.load_adata import load_adata
|
|
12
|
+
from ..cli.preprocess_adata import preprocess_adata
|
|
13
|
+
from ..cli.spatial_adata import spatial_adata
|
|
14
|
+
from ..cli.variant_adata import variant_adata
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def full_flow(
|
|
18
|
+
config_path: str,
|
|
19
|
+
) -> Tuple[Optional[ad.AnnData], Optional[Path]]:
|
|
20
|
+
load_adata(config_path)
|
|
21
|
+
preprocess_adata(config_path)
|
|
22
|
+
spatial_adata(config_path)
|
|
23
|
+
variant_adata(config_path)
|
|
24
|
+
chimeric_adata(config_path)
|
|
25
|
+
hmm_adata(config_path)
|
|
26
|
+
latent_adata(config_path)
|
smftools/cli/spatial_adata.py
CHANGED
|
@@ -6,9 +6,8 @@ from typing import Optional, Tuple
|
|
|
6
6
|
|
|
7
7
|
import anndata as ad
|
|
8
8
|
|
|
9
|
-
from smftools.constants import LOGGING_DIR,
|
|
9
|
+
from smftools.constants import LOGGING_DIR, SPATIAL_DIR
|
|
10
10
|
from smftools.logging_utils import get_logger, setup_logging
|
|
11
|
-
from smftools.optional_imports import require
|
|
12
11
|
|
|
13
12
|
logger = get_logger(__name__)
|
|
14
13
|
|
|
@@ -36,7 +35,7 @@ def spatial_adata(
|
|
|
36
35
|
spatial_adata_path : Path | None
|
|
37
36
|
Path to the “current” spatial AnnData (or hmm AnnData if we skip to that).
|
|
38
37
|
"""
|
|
39
|
-
from ..readwrite import
|
|
38
|
+
from ..readwrite import safe_read_h5ad
|
|
40
39
|
from .helpers import get_adata_paths, load_experiment_config
|
|
41
40
|
|
|
42
41
|
# 1) Ensure config + basic paths via load_adata
|
|
@@ -47,7 +46,10 @@ def spatial_adata(
|
|
|
47
46
|
pp_path = paths.pp
|
|
48
47
|
pp_dedup_path = paths.pp_dedup
|
|
49
48
|
spatial_path = paths.spatial
|
|
49
|
+
chimeric_path = paths.chimeric
|
|
50
|
+
variant_path = paths.variant
|
|
50
51
|
hmm_path = paths.hmm
|
|
52
|
+
latent_path = paths.latent
|
|
51
53
|
|
|
52
54
|
# Stage-skipping logic for spatial
|
|
53
55
|
if not getattr(cfg, "force_redo_spatial_analyses", False):
|
|
@@ -65,9 +67,18 @@ def spatial_adata(
|
|
|
65
67
|
if hmm_path.exists():
|
|
66
68
|
start_adata = _load(hmm_path)
|
|
67
69
|
source_path = hmm_path
|
|
70
|
+
elif latent_path.exists():
|
|
71
|
+
start_adata = _load(latent_path)
|
|
72
|
+
source_path = latent_path
|
|
68
73
|
elif spatial_path.exists():
|
|
69
74
|
start_adata = _load(spatial_path)
|
|
70
75
|
source_path = spatial_path
|
|
76
|
+
elif chimeric_path.exists():
|
|
77
|
+
start_adata = _load(chimeric_path)
|
|
78
|
+
source_path = chimeric_path
|
|
79
|
+
elif variant_path.exists():
|
|
80
|
+
start_adata = _load(variant_path)
|
|
81
|
+
source_path = variant_path
|
|
71
82
|
elif pp_dedup_path.exists():
|
|
72
83
|
start_adata = _load(pp_dedup_path)
|
|
73
84
|
source_path = pp_dedup_path
|
|
@@ -140,7 +151,6 @@ def spatial_adata_core(
|
|
|
140
151
|
from ..plotting import (
|
|
141
152
|
combined_raw_clustermap,
|
|
142
153
|
plot_rolling_grid,
|
|
143
|
-
plot_rolling_nn_and_layer,
|
|
144
154
|
plot_spatial_autocorr_grid,
|
|
145
155
|
)
|
|
146
156
|
from ..preprocessing import (
|
|
@@ -149,12 +159,10 @@ def spatial_adata_core(
|
|
|
149
159
|
reindex_references_adata,
|
|
150
160
|
)
|
|
151
161
|
from ..readwrite import make_dirs, safe_read_h5ad
|
|
152
|
-
from ..tools import rolling_window_nn_distance
|
|
153
162
|
from ..tools.position_stats import (
|
|
154
163
|
compute_positionwise_statistics,
|
|
155
164
|
plot_positionwise_matrices,
|
|
156
165
|
)
|
|
157
|
-
from ..tools.rolling_nn_distance import assign_rolling_nn_results
|
|
158
166
|
from ..tools.spatial_autocorrelation import (
|
|
159
167
|
analyze_autocorr_matrix,
|
|
160
168
|
binary_autocorrelation_with_spacing,
|
|
@@ -227,13 +235,13 @@ def spatial_adata_core(
|
|
|
227
235
|
references = adata.obs[cfg.reference_column].cat.categories
|
|
228
236
|
|
|
229
237
|
# ============================================================
|
|
230
|
-
# 1) Clustermaps (non-direct modalities) on
|
|
238
|
+
# 1) Clustermaps (non-direct modalities) on preprocessed adata
|
|
231
239
|
# ============================================================
|
|
232
240
|
if smf_modality != "direct":
|
|
233
241
|
preprocessed_version_available = pp_adata_path.exists()
|
|
234
242
|
|
|
235
243
|
if preprocessed_version_available:
|
|
236
|
-
pp_clustermap_dir = spatial_directory / "
|
|
244
|
+
pp_clustermap_dir = spatial_directory / "01_clustermaps"
|
|
237
245
|
|
|
238
246
|
if pp_clustermap_dir.is_dir() and not getattr(
|
|
239
247
|
cfg, "force_redo_spatial_analyses", False
|
|
@@ -292,7 +300,7 @@ def spatial_adata_core(
|
|
|
292
300
|
min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[
|
|
293
301
|
0
|
|
294
302
|
],
|
|
295
|
-
min_position_valid_fraction=cfg.
|
|
303
|
+
min_position_valid_fraction=1 - cfg.position_max_nan_threshold,
|
|
296
304
|
demux_types=cfg.clustermap_demux_types_to_plot,
|
|
297
305
|
bins=None,
|
|
298
306
|
sample_mapping=None,
|
|
@@ -303,10 +311,10 @@ def spatial_adata_core(
|
|
|
303
311
|
)
|
|
304
312
|
|
|
305
313
|
# ============================================================
|
|
306
|
-
# 2) Clustermaps on
|
|
314
|
+
# 2) Clustermaps on deduplicated preprocessed AnnDatas
|
|
307
315
|
# ============================================================
|
|
308
316
|
spatial_dir_dedup = spatial_directory / "deduplicated"
|
|
309
|
-
clustermap_dir_dedup = spatial_dir_dedup / "
|
|
317
|
+
clustermap_dir_dedup = spatial_dir_dedup / "01_clustermaps"
|
|
310
318
|
|
|
311
319
|
# Clustermaps on deduplicated adata
|
|
312
320
|
if clustermap_dir_dedup.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
|
|
@@ -343,104 +351,10 @@ def spatial_adata_core(
|
|
|
343
351
|
index_col_suffix=reindex_suffix,
|
|
344
352
|
)
|
|
345
353
|
|
|
346
|
-
# ============================================================
|
|
347
|
-
# 2b) Rolling NN distances + layer clustermaps
|
|
348
|
-
# ============================================================
|
|
349
|
-
pp_rolling_nn_dir = spatial_dir_dedup / "06b_rolling_nn_clustermaps"
|
|
350
|
-
|
|
351
|
-
if pp_rolling_nn_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
|
|
352
|
-
logger.debug(f"{pp_rolling_nn_dir} already exists. Skipping rolling NN distance plots.")
|
|
353
|
-
else:
|
|
354
|
-
make_dirs([pp_rolling_nn_dir])
|
|
355
|
-
samples = (
|
|
356
|
-
adata.obs[cfg.sample_name_col_for_plotting].astype("category").cat.categories.tolist()
|
|
357
|
-
)
|
|
358
|
-
references = adata.obs[cfg.reference_column].astype("category").cat.categories.tolist()
|
|
359
|
-
|
|
360
|
-
for reference in references:
|
|
361
|
-
for sample in samples:
|
|
362
|
-
mask = (adata.obs[cfg.sample_name_col_for_plotting] == sample) & (
|
|
363
|
-
adata.obs[cfg.reference_column] == reference
|
|
364
|
-
)
|
|
365
|
-
if not mask.any():
|
|
366
|
-
continue
|
|
367
|
-
|
|
368
|
-
subset = adata[mask]
|
|
369
|
-
site_mask = (
|
|
370
|
-
adata.var[[f"{reference}_{st}_site" for st in cfg.rolling_nn_site_types]]
|
|
371
|
-
.fillna(False)
|
|
372
|
-
.any(axis=1)
|
|
373
|
-
)
|
|
374
|
-
subset = subset[:, site_mask].copy()
|
|
375
|
-
try:
|
|
376
|
-
rolling_values, rolling_starts = rolling_window_nn_distance(
|
|
377
|
-
subset,
|
|
378
|
-
layer=cfg.rolling_nn_layer,
|
|
379
|
-
window=cfg.rolling_nn_window,
|
|
380
|
-
step=cfg.rolling_nn_step,
|
|
381
|
-
min_overlap=cfg.rolling_nn_min_overlap,
|
|
382
|
-
return_fraction=cfg.rolling_nn_return_fraction,
|
|
383
|
-
store_obsm=cfg.rolling_nn_obsm_key,
|
|
384
|
-
)
|
|
385
|
-
except Exception as exc:
|
|
386
|
-
logger.warning(
|
|
387
|
-
"Rolling NN distance computation failed for sample=%s ref=%s: %s",
|
|
388
|
-
sample,
|
|
389
|
-
reference,
|
|
390
|
-
exc,
|
|
391
|
-
)
|
|
392
|
-
continue
|
|
393
|
-
|
|
394
|
-
safe_sample = str(sample).replace(os.sep, "_")
|
|
395
|
-
safe_ref = str(reference).replace(os.sep, "_")
|
|
396
|
-
parent_obsm_key = f"{cfg.rolling_nn_obsm_key}__{safe_ref}"
|
|
397
|
-
try:
|
|
398
|
-
assign_rolling_nn_results(
|
|
399
|
-
adata,
|
|
400
|
-
subset,
|
|
401
|
-
rolling_values,
|
|
402
|
-
rolling_starts,
|
|
403
|
-
obsm_key=parent_obsm_key,
|
|
404
|
-
window=cfg.rolling_nn_window,
|
|
405
|
-
step=cfg.rolling_nn_step,
|
|
406
|
-
min_overlap=cfg.rolling_nn_min_overlap,
|
|
407
|
-
return_fraction=cfg.rolling_nn_return_fraction,
|
|
408
|
-
layer=cfg.rolling_nn_layer,
|
|
409
|
-
)
|
|
410
|
-
except Exception as exc:
|
|
411
|
-
logger.warning(
|
|
412
|
-
"Failed to merge rolling NN results for sample=%s ref=%s: %s",
|
|
413
|
-
sample,
|
|
414
|
-
reference,
|
|
415
|
-
exc,
|
|
416
|
-
)
|
|
417
|
-
adata.uns.setdefault(f"{cfg.rolling_nn_obsm_key}_reference_map", {})[reference] = (
|
|
418
|
-
parent_obsm_key
|
|
419
|
-
)
|
|
420
|
-
out_png = pp_rolling_nn_dir / f"{safe_sample}__{safe_ref}.png"
|
|
421
|
-
title = f"{sample} {reference}"
|
|
422
|
-
try:
|
|
423
|
-
plot_rolling_nn_and_layer(
|
|
424
|
-
subset,
|
|
425
|
-
obsm_key=cfg.rolling_nn_obsm_key,
|
|
426
|
-
layer_key=cfg.rolling_nn_plot_layer,
|
|
427
|
-
max_nan_fraction=cfg.position_max_nan_threshold,
|
|
428
|
-
var_valid_fraction_col=f"{reference}_valid_fraction",
|
|
429
|
-
title=title,
|
|
430
|
-
save_name=out_png,
|
|
431
|
-
)
|
|
432
|
-
except Exception as exc:
|
|
433
|
-
logger.warning(
|
|
434
|
-
"Failed rolling NN plot for sample=%s ref=%s: %s",
|
|
435
|
-
sample,
|
|
436
|
-
reference,
|
|
437
|
-
exc,
|
|
438
|
-
)
|
|
439
|
-
|
|
440
354
|
# ============================================================
|
|
441
355
|
# 3) Spatial autocorrelation + rolling metrics
|
|
442
356
|
# ============================================================
|
|
443
|
-
pp_autocorr_dir = spatial_dir_dedup / "
|
|
357
|
+
pp_autocorr_dir = spatial_dir_dedup / "02_autocorrelations"
|
|
444
358
|
|
|
445
359
|
if pp_autocorr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
|
|
446
360
|
logger.debug(f"{pp_autocorr_dir} already exists. Skipping autocorrelation plotting.")
|
|
@@ -783,7 +697,7 @@ def spatial_adata_core(
|
|
|
783
697
|
# ============================================================
|
|
784
698
|
# 4) Pearson / correlation matrices
|
|
785
699
|
# ============================================================
|
|
786
|
-
corr_dir = spatial_dir_dedup / "
|
|
700
|
+
corr_dir = spatial_dir_dedup / "03_correlation_matrices"
|
|
787
701
|
|
|
788
702
|
if corr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
|
|
789
703
|
logger.debug(f"{corr_dir} already exists. Skipping correlation matrix plotting.")
|
|
@@ -816,10 +730,10 @@ def spatial_adata_core(
|
|
|
816
730
|
)
|
|
817
731
|
|
|
818
732
|
# ============================================================
|
|
819
|
-
#
|
|
733
|
+
# 4) Save spatial AnnData
|
|
820
734
|
# ============================================================
|
|
821
735
|
if (not spatial_adata_path.exists()) or getattr(cfg, "force_redo_spatial_analyses", False):
|
|
822
|
-
logger.info("Saving spatial analyzed AnnData
|
|
736
|
+
logger.info("Saving spatial analyzed AnnData.")
|
|
823
737
|
record_smftools_metadata(
|
|
824
738
|
adata,
|
|
825
739
|
step_name="spatial",
|