smftools 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. smftools/_version.py +1 -1
  2. smftools/cli/chimeric_adata.py +1563 -0
  3. smftools/cli/helpers.py +18 -2
  4. smftools/cli/hmm_adata.py +18 -1
  5. smftools/cli/latent_adata.py +522 -67
  6. smftools/cli/load_adata.py +2 -2
  7. smftools/cli/preprocess_adata.py +32 -93
  8. smftools/cli/recipes.py +26 -0
  9. smftools/cli/spatial_adata.py +23 -109
  10. smftools/cli/variant_adata.py +423 -0
  11. smftools/cli_entry.py +41 -5
  12. smftools/config/conversion.yaml +0 -10
  13. smftools/config/deaminase.yaml +3 -0
  14. smftools/config/default.yaml +49 -13
  15. smftools/config/experiment_config.py +96 -3
  16. smftools/constants.py +4 -0
  17. smftools/hmm/call_hmm_peaks.py +1 -1
  18. smftools/informatics/binarize_converted_base_identities.py +2 -89
  19. smftools/informatics/converted_BAM_to_adata.py +53 -13
  20. smftools/informatics/h5ad_functions.py +83 -0
  21. smftools/informatics/modkit_extract_to_adata.py +4 -0
  22. smftools/plotting/__init__.py +26 -12
  23. smftools/plotting/autocorrelation_plotting.py +22 -4
  24. smftools/plotting/chimeric_plotting.py +1893 -0
  25. smftools/plotting/classifiers.py +28 -14
  26. smftools/plotting/general_plotting.py +58 -3362
  27. smftools/plotting/hmm_plotting.py +1586 -2
  28. smftools/plotting/latent_plotting.py +804 -0
  29. smftools/plotting/plotting_utils.py +243 -0
  30. smftools/plotting/position_stats.py +16 -8
  31. smftools/plotting/preprocess_plotting.py +281 -0
  32. smftools/plotting/qc_plotting.py +8 -3
  33. smftools/plotting/spatial_plotting.py +1134 -0
  34. smftools/plotting/variant_plotting.py +1231 -0
  35. smftools/preprocessing/__init__.py +3 -0
  36. smftools/preprocessing/append_base_context.py +1 -1
  37. smftools/preprocessing/append_mismatch_frequency_sites.py +35 -6
  38. smftools/preprocessing/append_sequence_mismatch_annotations.py +171 -0
  39. smftools/preprocessing/append_variant_call_layer.py +480 -0
  40. smftools/preprocessing/flag_duplicate_reads.py +4 -4
  41. smftools/preprocessing/invert_adata.py +1 -0
  42. smftools/readwrite.py +109 -85
  43. smftools/tools/__init__.py +6 -0
  44. smftools/tools/calculate_knn.py +121 -0
  45. smftools/tools/calculate_nmf.py +18 -7
  46. smftools/tools/calculate_pca.py +180 -0
  47. smftools/tools/calculate_umap.py +70 -154
  48. smftools/tools/position_stats.py +4 -4
  49. smftools/tools/rolling_nn_distance.py +640 -3
  50. smftools/tools/sequence_alignment.py +140 -0
  51. smftools/tools/tensor_factorization.py +52 -4
  52. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/METADATA +3 -1
  53. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/RECORD +56 -42
  54. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/WHEEL +0 -0
  55. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/entry_points.txt +0 -0
  56. {smftools-0.3.1.dist-info → smftools-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -7,7 +7,7 @@ from typing import Iterable, Union
7
7
 
8
8
  import numpy as np
9
9
 
10
- from smftools.constants import HMM_DIR, LOAD_DIR, LOGGING_DIR, PREPROCESS_DIR, SPATIAL_DIR
10
+ from smftools.constants import LOAD_DIR, LOGGING_DIR
11
11
  from smftools.logging_utils import get_logger, setup_logging
12
12
 
13
13
  from .helpers import AdataPaths
@@ -105,7 +105,7 @@ def load_adata(config_path: str):
105
105
  from datetime import datetime
106
106
  from importlib import resources
107
107
 
108
- from ..readwrite import add_or_update_column_in_csv, make_dirs
108
+ from ..readwrite import make_dirs
109
109
  from .helpers import get_adata_paths, load_experiment_config
110
110
 
111
111
  # -----------------------------
@@ -6,7 +6,13 @@ from typing import Optional, Tuple
6
6
 
7
7
  import anndata as ad
8
8
 
9
- from smftools.constants import LOGGING_DIR, PREPROCESS_DIR
9
+ from smftools.constants import (
10
+ BASE_QUALITY_SCORES,
11
+ DEMUX_TYPE,
12
+ LOGGING_DIR,
13
+ PREPROCESS_DIR,
14
+ READ_SPAN_MASK,
15
+ )
10
16
  from smftools.logging_utils import get_logger, setup_logging
11
17
 
12
18
  logger = get_logger(__name__)
@@ -175,10 +181,6 @@ def preprocess_adata_core(
175
181
  - `pp_adata_path` and `pp_dup_rem_adata_path` are the target output paths for
176
182
  preprocessed and preprocessed+deduplicated AnnData.
177
183
 
178
- Does NOT:
179
- - Decide which stage to load from (that's the wrapper's job).
180
- - Decide whether to skip entirely; it always runs its steps, but individual
181
- sub-steps may skip based on `cfg.bypass_*` or directory existence.
182
184
 
183
185
  Returns
184
186
  -------
@@ -198,12 +200,10 @@ def preprocess_adata_core(
198
200
  from ..plotting import (
199
201
  plot_read_qc_histograms,
200
202
  plot_read_span_quality_clustermaps,
201
- plot_sequence_integer_encoding_clustermaps,
202
203
  )
203
204
  from ..preprocessing import (
204
205
  append_base_context,
205
206
  append_binary_layer_by_base_context,
206
- append_mismatch_frequency_sites,
207
207
  binarize_adata,
208
208
  binarize_on_Youden,
209
209
  calculate_complexity_II,
@@ -476,6 +476,22 @@ def preprocess_adata_core(
476
476
  from_valid_sites_only=True,
477
477
  )
478
478
 
479
+ # -----------------------------
480
+ # Optional inversion along positions axis
481
+ # -----------------------------
482
+ if getattr(cfg, "invert_adata", False):
483
+ adata = invert_adata(adata)
484
+
485
+ # -----------------------------
486
+ # Optional reindexing by reference
487
+ # -----------------------------
488
+ reindex_references_adata(
489
+ adata,
490
+ reference_col=cfg.reference_column,
491
+ offsets=cfg.reindexing_offsets,
492
+ new_col=cfg.reindexed_var_suffix,
493
+ )
494
+
479
495
  ############### Duplicate detection for conversion/deamination SMF ###############
480
496
  if smf_modality != "direct":
481
497
  references = adata.obs[cfg.reference_column].cat.categories
@@ -511,7 +527,7 @@ def preprocess_adata_core(
511
527
  hierarchical_metric="euclidean",
512
528
  hierarchical_window=cfg.duplicate_detection_window_size_for_hamming_neighbors,
513
529
  demux_types=cfg.duplicate_detection_demux_types_to_use,
514
- demux_col="demux_type",
530
+ demux_col=DEMUX_TYPE,
515
531
  )
516
532
 
517
533
  # Use the flagged duplicate read groups and perform complexity analysis
@@ -537,96 +553,19 @@ def preprocess_adata_core(
537
553
  adata_unique = adata
538
554
  ########################################################################################################################
539
555
 
540
- # -----------------------------
541
- # Optional inversion along positions axis
542
- # -----------------------------
543
- if getattr(cfg, "invert_adata", False):
544
- adata = invert_adata(adata)
545
-
546
- # -----------------------------
547
- # Optional reindexing by reference
548
- # -----------------------------
549
- reindex_references_adata(
550
- adata,
551
- reference_col=cfg.reference_column,
552
- offsets=cfg.reindexing_offsets,
553
- new_col=cfg.reindexed_var_suffix,
554
- )
555
-
556
- ############################################### Append mismatch frequency per position ###############################################
557
- append_mismatch_frequency_sites(
558
- adata_unique,
559
- ref_column=cfg.reference_column,
560
- mismatch_layer=cfg.mismatch_frequency_layer,
561
- read_span_layer=cfg.mismatch_frequency_read_span_layer,
562
- mismatch_frequency_range=cfg.mismatch_frequency_range,
563
- bypass=cfg.bypass_append_mismatch_frequency_sites,
564
- force_redo=cfg.force_redo_append_mismatch_frequency_sites,
565
- )
566
-
567
- ############################################### Plot integer sequence encoding clustermaps ###############################################
568
- if "sequence_integer_encoding" not in adata.layers:
569
- logger.debug(
570
- "sequence_integer_encoding layer not found; skipping integer encoding clustermaps."
571
- )
572
- else:
573
- pp_seq_clustermap_dir = preprocess_directory / "06_sequence_integer_encoding_clustermaps"
574
- if pp_seq_clustermap_dir.is_dir() and not cfg.force_redo_preprocessing:
575
- logger.debug(
576
- f"{pp_seq_clustermap_dir} already exists. Skipping sequence integer encoding clustermaps."
577
- )
578
- else:
579
- make_dirs([pp_seq_clustermap_dir])
580
- plot_sequence_integer_encoding_clustermaps(
581
- adata,
582
- sample_col=cfg.sample_name_col_for_plotting,
583
- reference_col=cfg.reference_column,
584
- demux_types=cfg.clustermap_demux_types_to_plot,
585
- min_quality=None,
586
- min_length=None,
587
- min_mapped_length_to_reference_length_ratio=None,
588
- sort_by="none",
589
- max_unknown_fraction=0.5,
590
- save_path=pp_seq_clustermap_dir,
591
- show_position_axis=True,
592
- )
593
-
594
- pp_dedup_seq_clustermap_dir = (
595
- preprocess_directory / "deduplicated" / "06_sequence_integer_encoding_clustermaps"
596
- )
597
- if pp_dedup_seq_clustermap_dir.is_dir() and not cfg.force_redo_preprocessing:
598
- logger.debug(
599
- f"{pp_dedup_seq_clustermap_dir} already exists. Skipping sequence integer encoding clustermaps."
600
- )
601
- else:
602
- make_dirs([pp_dedup_seq_clustermap_dir])
603
- plot_sequence_integer_encoding_clustermaps(
604
- adata_unique,
605
- sample_col=cfg.sample_name_col_for_plotting,
606
- reference_col=cfg.reference_column,
607
- demux_types=cfg.clustermap_demux_types_to_plot,
608
- min_quality=None,
609
- min_length=None,
610
- min_mapped_length_to_reference_length_ratio=None,
611
- sort_by="none",
612
- max_unknown_fraction=0.5,
613
- save_path=pp_dedup_seq_clustermap_dir,
614
- show_position_axis=True,
615
- )
616
-
617
556
  ############################################### Plot read span mask + base quality clustermaps ###############################################
618
557
  quality_layer = None
619
- if "base_quality_scores" in adata.layers:
620
- quality_layer = "base_quality_scores"
558
+ if BASE_QUALITY_SCORES in adata.layers:
559
+ quality_layer = BASE_QUALITY_SCORES
621
560
  elif "base_qualities" in adata.layers:
622
561
  quality_layer = "base_qualities"
623
562
 
624
- if "read_span_mask" not in adata.layers or quality_layer is None:
563
+ if READ_SPAN_MASK not in adata.layers or quality_layer is None:
625
564
  logger.debug(
626
565
  "read_span_mask and base quality layers not found; skipping read span/base quality clustermaps."
627
566
  )
628
567
  else:
629
- pp_span_quality_dir = preprocess_directory / "07_read_span_quality_clustermaps"
568
+ pp_span_quality_dir = preprocess_directory / "06_read_span_and_quality_clustermaps"
630
569
  if pp_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
631
570
  logger.debug(
632
571
  f"{pp_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
@@ -638,7 +577,7 @@ def preprocess_adata_core(
638
577
  sample_col=cfg.sample_name_col_for_plotting,
639
578
  reference_col=cfg.reference_column,
640
579
  quality_layer=quality_layer,
641
- read_span_layer="read_span_mask",
580
+ read_span_layer=READ_SPAN_MASK,
642
581
  demux_types=cfg.clustermap_demux_types_to_plot,
643
582
  save_path=pp_span_quality_dir,
644
583
  show_position_axis=True,
@@ -646,20 +585,20 @@ def preprocess_adata_core(
646
585
  )
647
586
 
648
587
  pp_dedup_span_quality_dir = (
649
- preprocess_directory / "deduplicated" / "07_read_span_quality_clustermaps"
588
+ preprocess_directory / "deduplicated" / "06_read_span_and_quality_clustermaps"
650
589
  )
651
590
  if pp_dedup_span_quality_dir.is_dir() and not cfg.force_redo_preprocessing:
652
591
  logger.debug(
653
592
  f"{pp_dedup_span_quality_dir} already exists. Skipping read span/base quality clustermaps."
654
593
  )
655
- elif quality_layer in adata_unique.layers and "read_span_mask" in adata_unique.layers:
594
+ elif quality_layer in adata_unique.layers and READ_SPAN_MASK in adata_unique.layers:
656
595
  make_dirs([pp_dedup_span_quality_dir])
657
596
  plot_read_span_quality_clustermaps(
658
597
  adata_unique,
659
598
  sample_col=cfg.sample_name_col_for_plotting,
660
599
  reference_col=cfg.reference_column,
661
600
  quality_layer=quality_layer,
662
- read_span_layer="read_span_mask",
601
+ read_span_layer=READ_SPAN_MASK,
663
602
  demux_types=cfg.clustermap_demux_types_to_plot,
664
603
  save_path=pp_dedup_span_quality_dir,
665
604
  show_position_axis=True,
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Optional, Tuple
5
+
6
+ import anndata as ad
7
+
8
+ from ..cli.chimeric_adata import chimeric_adata
9
+ from ..cli.hmm_adata import hmm_adata
10
+ from ..cli.latent_adata import latent_adata
11
+ from ..cli.load_adata import load_adata
12
+ from ..cli.preprocess_adata import preprocess_adata
13
+ from ..cli.spatial_adata import spatial_adata
14
+ from ..cli.variant_adata import variant_adata
15
+
16
+
17
+ def full_flow(
18
+ config_path: str,
19
+ ) -> Tuple[Optional[ad.AnnData], Optional[Path]]:
20
+ load_adata(config_path)
21
+ preprocess_adata(config_path)
22
+ spatial_adata(config_path)
23
+ variant_adata(config_path)
24
+ chimeric_adata(config_path)
25
+ hmm_adata(config_path)
26
+ latent_adata(config_path)
@@ -6,9 +6,8 @@ from typing import Optional, Tuple
6
6
 
7
7
  import anndata as ad
8
8
 
9
- from smftools.constants import LOGGING_DIR, SEQUENCE_INTEGER_ENCODING, SPATIAL_DIR
9
+ from smftools.constants import LOGGING_DIR, SPATIAL_DIR
10
10
  from smftools.logging_utils import get_logger, setup_logging
11
- from smftools.optional_imports import require
12
11
 
13
12
  logger = get_logger(__name__)
14
13
 
@@ -36,7 +35,7 @@ def spatial_adata(
36
35
  spatial_adata_path : Path | None
37
36
  Path to the “current” spatial AnnData (or hmm AnnData if we skip to that).
38
37
  """
39
- from ..readwrite import add_or_update_column_in_csv, safe_read_h5ad
38
+ from ..readwrite import safe_read_h5ad
40
39
  from .helpers import get_adata_paths, load_experiment_config
41
40
 
42
41
  # 1) Ensure config + basic paths via load_adata
@@ -47,7 +46,10 @@ def spatial_adata(
47
46
  pp_path = paths.pp
48
47
  pp_dedup_path = paths.pp_dedup
49
48
  spatial_path = paths.spatial
49
+ chimeric_path = paths.chimeric
50
+ variant_path = paths.variant
50
51
  hmm_path = paths.hmm
52
+ latent_path = paths.latent
51
53
 
52
54
  # Stage-skipping logic for spatial
53
55
  if not getattr(cfg, "force_redo_spatial_analyses", False):
@@ -65,9 +67,18 @@ def spatial_adata(
65
67
  if hmm_path.exists():
66
68
  start_adata = _load(hmm_path)
67
69
  source_path = hmm_path
70
+ elif latent_path.exists():
71
+ start_adata = _load(latent_path)
72
+ source_path = latent_path
68
73
  elif spatial_path.exists():
69
74
  start_adata = _load(spatial_path)
70
75
  source_path = spatial_path
76
+ elif chimeric_path.exists():
77
+ start_adata = _load(chimeric_path)
78
+ source_path = chimeric_path
79
+ elif variant_path.exists():
80
+ start_adata = _load(variant_path)
81
+ source_path = variant_path
71
82
  elif pp_dedup_path.exists():
72
83
  start_adata = _load(pp_dedup_path)
73
84
  source_path = pp_dedup_path
@@ -140,7 +151,6 @@ def spatial_adata_core(
140
151
  from ..plotting import (
141
152
  combined_raw_clustermap,
142
153
  plot_rolling_grid,
143
- plot_rolling_nn_and_layer,
144
154
  plot_spatial_autocorr_grid,
145
155
  )
146
156
  from ..preprocessing import (
@@ -149,12 +159,10 @@ def spatial_adata_core(
149
159
  reindex_references_adata,
150
160
  )
151
161
  from ..readwrite import make_dirs, safe_read_h5ad
152
- from ..tools import rolling_window_nn_distance
153
162
  from ..tools.position_stats import (
154
163
  compute_positionwise_statistics,
155
164
  plot_positionwise_matrices,
156
165
  )
157
- from ..tools.rolling_nn_distance import assign_rolling_nn_results
158
166
  from ..tools.spatial_autocorrelation import (
159
167
  analyze_autocorr_matrix,
160
168
  binary_autocorrelation_with_spacing,
@@ -227,13 +235,13 @@ def spatial_adata_core(
227
235
  references = adata.obs[cfg.reference_column].cat.categories
228
236
 
229
237
  # ============================================================
230
- # 1) Clustermaps (non-direct modalities) on *preprocessed* data
238
+ # 1) Clustermaps (non-direct modalities) on preprocessed adata
231
239
  # ============================================================
232
240
  if smf_modality != "direct":
233
241
  preprocessed_version_available = pp_adata_path.exists()
234
242
 
235
243
  if preprocessed_version_available:
236
- pp_clustermap_dir = spatial_directory / "06_clustermaps"
244
+ pp_clustermap_dir = spatial_directory / "01_clustermaps"
237
245
 
238
246
  if pp_clustermap_dir.is_dir() and not getattr(
239
247
  cfg, "force_redo_spatial_analyses", False
@@ -292,7 +300,7 @@ def spatial_adata_core(
292
300
  min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[
293
301
  0
294
302
  ],
295
- min_position_valid_fraction=cfg.min_valid_fraction_positions_in_read_vs_ref,
303
+ min_position_valid_fraction=1 - cfg.position_max_nan_threshold,
296
304
  demux_types=cfg.clustermap_demux_types_to_plot,
297
305
  bins=None,
298
306
  sample_mapping=None,
@@ -303,10 +311,10 @@ def spatial_adata_core(
303
311
  )
304
312
 
305
313
  # ============================================================
306
- # 2) Clustermaps on *deduplicated* preprocessed AnnData
314
+ # 2) Clustermaps on deduplicated preprocessed AnnDatas
307
315
  # ============================================================
308
316
  spatial_dir_dedup = spatial_directory / "deduplicated"
309
- clustermap_dir_dedup = spatial_dir_dedup / "06_clustermaps"
317
+ clustermap_dir_dedup = spatial_dir_dedup / "01_clustermaps"
310
318
 
311
319
  # Clustermaps on deduplicated adata
312
320
  if clustermap_dir_dedup.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
@@ -343,104 +351,10 @@ def spatial_adata_core(
343
351
  index_col_suffix=reindex_suffix,
344
352
  )
345
353
 
346
- # ============================================================
347
- # 2b) Rolling NN distances + layer clustermaps
348
- # ============================================================
349
- pp_rolling_nn_dir = spatial_dir_dedup / "06b_rolling_nn_clustermaps"
350
-
351
- if pp_rolling_nn_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
352
- logger.debug(f"{pp_rolling_nn_dir} already exists. Skipping rolling NN distance plots.")
353
- else:
354
- make_dirs([pp_rolling_nn_dir])
355
- samples = (
356
- adata.obs[cfg.sample_name_col_for_plotting].astype("category").cat.categories.tolist()
357
- )
358
- references = adata.obs[cfg.reference_column].astype("category").cat.categories.tolist()
359
-
360
- for reference in references:
361
- for sample in samples:
362
- mask = (adata.obs[cfg.sample_name_col_for_plotting] == sample) & (
363
- adata.obs[cfg.reference_column] == reference
364
- )
365
- if not mask.any():
366
- continue
367
-
368
- subset = adata[mask]
369
- site_mask = (
370
- adata.var[[f"{reference}_{st}_site" for st in cfg.rolling_nn_site_types]]
371
- .fillna(False)
372
- .any(axis=1)
373
- )
374
- subset = subset[:, site_mask].copy()
375
- try:
376
- rolling_values, rolling_starts = rolling_window_nn_distance(
377
- subset,
378
- layer=cfg.rolling_nn_layer,
379
- window=cfg.rolling_nn_window,
380
- step=cfg.rolling_nn_step,
381
- min_overlap=cfg.rolling_nn_min_overlap,
382
- return_fraction=cfg.rolling_nn_return_fraction,
383
- store_obsm=cfg.rolling_nn_obsm_key,
384
- )
385
- except Exception as exc:
386
- logger.warning(
387
- "Rolling NN distance computation failed for sample=%s ref=%s: %s",
388
- sample,
389
- reference,
390
- exc,
391
- )
392
- continue
393
-
394
- safe_sample = str(sample).replace(os.sep, "_")
395
- safe_ref = str(reference).replace(os.sep, "_")
396
- parent_obsm_key = f"{cfg.rolling_nn_obsm_key}__{safe_ref}"
397
- try:
398
- assign_rolling_nn_results(
399
- adata,
400
- subset,
401
- rolling_values,
402
- rolling_starts,
403
- obsm_key=parent_obsm_key,
404
- window=cfg.rolling_nn_window,
405
- step=cfg.rolling_nn_step,
406
- min_overlap=cfg.rolling_nn_min_overlap,
407
- return_fraction=cfg.rolling_nn_return_fraction,
408
- layer=cfg.rolling_nn_layer,
409
- )
410
- except Exception as exc:
411
- logger.warning(
412
- "Failed to merge rolling NN results for sample=%s ref=%s: %s",
413
- sample,
414
- reference,
415
- exc,
416
- )
417
- adata.uns.setdefault(f"{cfg.rolling_nn_obsm_key}_reference_map", {})[reference] = (
418
- parent_obsm_key
419
- )
420
- out_png = pp_rolling_nn_dir / f"{safe_sample}__{safe_ref}.png"
421
- title = f"{sample} {reference}"
422
- try:
423
- plot_rolling_nn_and_layer(
424
- subset,
425
- obsm_key=cfg.rolling_nn_obsm_key,
426
- layer_key=cfg.rolling_nn_plot_layer,
427
- max_nan_fraction=cfg.position_max_nan_threshold,
428
- var_valid_fraction_col=f"{reference}_valid_fraction",
429
- title=title,
430
- save_name=out_png,
431
- )
432
- except Exception as exc:
433
- logger.warning(
434
- "Failed rolling NN plot for sample=%s ref=%s: %s",
435
- sample,
436
- reference,
437
- exc,
438
- )
439
-
440
354
  # ============================================================
441
355
  # 3) Spatial autocorrelation + rolling metrics
442
356
  # ============================================================
443
- pp_autocorr_dir = spatial_dir_dedup / "08_autocorrelations"
357
+ pp_autocorr_dir = spatial_dir_dedup / "02_autocorrelations"
444
358
 
445
359
  if pp_autocorr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
446
360
  logger.debug(f"{pp_autocorr_dir} already exists. Skipping autocorrelation plotting.")
@@ -783,7 +697,7 @@ def spatial_adata_core(
783
697
  # ============================================================
784
698
  # 4) Pearson / correlation matrices
785
699
  # ============================================================
786
- corr_dir = spatial_dir_dedup / "09_correlation_matrices"
700
+ corr_dir = spatial_dir_dedup / "03_correlation_matrices"
787
701
 
788
702
  if corr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
789
703
  logger.debug(f"{corr_dir} already exists. Skipping correlation matrix plotting.")
@@ -816,10 +730,10 @@ def spatial_adata_core(
816
730
  )
817
731
 
818
732
  # ============================================================
819
- # 5) Save spatial AnnData
733
+ # 4) Save spatial AnnData
820
734
  # ============================================================
821
735
  if (not spatial_adata_path.exists()) or getattr(cfg, "force_redo_spatial_analyses", False):
822
- logger.info("Saving spatial analyzed AnnData (post preprocessing and duplicate removal).")
736
+ logger.info("Saving spatial analyzed AnnData.")
823
737
  record_smftools_metadata(
824
738
  adata,
825
739
  step_name="spatial",