smftools 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. smftools/__init__.py +6 -8
  2. smftools/_settings.py +4 -6
  3. smftools/_version.py +1 -1
  4. smftools/cli/helpers.py +7 -1
  5. smftools/cli/hmm_adata.py +902 -244
  6. smftools/cli/load_adata.py +318 -198
  7. smftools/cli/preprocess_adata.py +285 -171
  8. smftools/cli/spatial_adata.py +137 -53
  9. smftools/cli_entry.py +94 -178
  10. smftools/config/__init__.py +1 -1
  11. smftools/config/conversion.yaml +5 -1
  12. smftools/config/deaminase.yaml +1 -1
  13. smftools/config/default.yaml +22 -17
  14. smftools/config/direct.yaml +8 -3
  15. smftools/config/discover_input_files.py +19 -5
  16. smftools/config/experiment_config.py +505 -276
  17. smftools/constants.py +37 -0
  18. smftools/datasets/__init__.py +2 -8
  19. smftools/datasets/datasets.py +32 -18
  20. smftools/hmm/HMM.py +2125 -1426
  21. smftools/hmm/__init__.py +2 -3
  22. smftools/hmm/archived/call_hmm_peaks.py +16 -1
  23. smftools/hmm/call_hmm_peaks.py +173 -193
  24. smftools/hmm/display_hmm.py +19 -6
  25. smftools/hmm/hmm_readwrite.py +13 -4
  26. smftools/hmm/nucleosome_hmm_refinement.py +102 -14
  27. smftools/informatics/__init__.py +30 -7
  28. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  30. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  31. smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
  32. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
  33. smftools/informatics/archived/print_bam_query_seq.py +7 -1
  34. smftools/informatics/bam_functions.py +379 -156
  35. smftools/informatics/basecalling.py +51 -9
  36. smftools/informatics/bed_functions.py +90 -57
  37. smftools/informatics/binarize_converted_base_identities.py +18 -7
  38. smftools/informatics/complement_base_list.py +7 -6
  39. smftools/informatics/converted_BAM_to_adata.py +265 -122
  40. smftools/informatics/fasta_functions.py +161 -83
  41. smftools/informatics/h5ad_functions.py +195 -29
  42. smftools/informatics/modkit_extract_to_adata.py +609 -270
  43. smftools/informatics/modkit_functions.py +85 -44
  44. smftools/informatics/ohe.py +44 -21
  45. smftools/informatics/pod5_functions.py +112 -73
  46. smftools/informatics/run_multiqc.py +20 -14
  47. smftools/logging_utils.py +51 -0
  48. smftools/machine_learning/__init__.py +2 -7
  49. smftools/machine_learning/data/anndata_data_module.py +143 -50
  50. smftools/machine_learning/data/preprocessing.py +2 -1
  51. smftools/machine_learning/evaluation/__init__.py +1 -1
  52. smftools/machine_learning/evaluation/eval_utils.py +11 -14
  53. smftools/machine_learning/evaluation/evaluators.py +46 -33
  54. smftools/machine_learning/inference/__init__.py +1 -1
  55. smftools/machine_learning/inference/inference_utils.py +7 -4
  56. smftools/machine_learning/inference/lightning_inference.py +9 -13
  57. smftools/machine_learning/inference/sklearn_inference.py +6 -8
  58. smftools/machine_learning/inference/sliding_window_inference.py +35 -25
  59. smftools/machine_learning/models/__init__.py +10 -5
  60. smftools/machine_learning/models/base.py +28 -42
  61. smftools/machine_learning/models/cnn.py +15 -11
  62. smftools/machine_learning/models/lightning_base.py +71 -40
  63. smftools/machine_learning/models/mlp.py +13 -4
  64. smftools/machine_learning/models/positional.py +3 -2
  65. smftools/machine_learning/models/rnn.py +3 -2
  66. smftools/machine_learning/models/sklearn_models.py +39 -22
  67. smftools/machine_learning/models/transformer.py +68 -53
  68. smftools/machine_learning/models/wrappers.py +2 -1
  69. smftools/machine_learning/training/__init__.py +2 -2
  70. smftools/machine_learning/training/train_lightning_model.py +29 -20
  71. smftools/machine_learning/training/train_sklearn_model.py +9 -15
  72. smftools/machine_learning/utils/__init__.py +1 -1
  73. smftools/machine_learning/utils/device.py +7 -4
  74. smftools/machine_learning/utils/grl.py +3 -1
  75. smftools/metadata.py +443 -0
  76. smftools/plotting/__init__.py +19 -5
  77. smftools/plotting/autocorrelation_plotting.py +145 -44
  78. smftools/plotting/classifiers.py +162 -72
  79. smftools/plotting/general_plotting.py +347 -168
  80. smftools/plotting/hmm_plotting.py +42 -13
  81. smftools/plotting/position_stats.py +145 -85
  82. smftools/plotting/qc_plotting.py +20 -12
  83. smftools/preprocessing/__init__.py +8 -8
  84. smftools/preprocessing/append_base_context.py +105 -79
  85. smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
  86. smftools/preprocessing/{archives → archived}/calculate_complexity.py +3 -1
  87. smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
  88. smftools/preprocessing/binarize.py +21 -4
  89. smftools/preprocessing/binarize_on_Youden.py +127 -31
  90. smftools/preprocessing/binary_layers_to_ohe.py +17 -11
  91. smftools/preprocessing/calculate_complexity_II.py +86 -59
  92. smftools/preprocessing/calculate_consensus.py +28 -19
  93. smftools/preprocessing/calculate_coverage.py +44 -22
  94. smftools/preprocessing/calculate_pairwise_differences.py +2 -1
  95. smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
  96. smftools/preprocessing/calculate_position_Youden.py +103 -55
  97. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  98. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  99. smftools/preprocessing/clean_NaN.py +38 -28
  100. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  101. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +70 -37
  102. smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
  103. smftools/preprocessing/flag_duplicate_reads.py +688 -271
  104. smftools/preprocessing/invert_adata.py +26 -11
  105. smftools/preprocessing/load_sample_sheet.py +40 -22
  106. smftools/preprocessing/make_dirs.py +8 -3
  107. smftools/preprocessing/min_non_diagonal.py +2 -1
  108. smftools/preprocessing/recipes.py +56 -23
  109. smftools/preprocessing/reindex_references_adata.py +93 -27
  110. smftools/preprocessing/subsample_adata.py +33 -16
  111. smftools/readwrite.py +264 -109
  112. smftools/schema/__init__.py +11 -0
  113. smftools/schema/anndata_schema_v1.yaml +227 -0
  114. smftools/tools/__init__.py +3 -4
  115. smftools/tools/archived/classifiers.py +163 -0
  116. smftools/tools/archived/subset_adata_v1.py +10 -1
  117. smftools/tools/archived/subset_adata_v2.py +12 -1
  118. smftools/tools/calculate_umap.py +54 -15
  119. smftools/tools/cluster_adata_on_methylation.py +115 -46
  120. smftools/tools/general_tools.py +70 -25
  121. smftools/tools/position_stats.py +229 -98
  122. smftools/tools/read_stats.py +50 -29
  123. smftools/tools/spatial_autocorrelation.py +365 -192
  124. smftools/tools/subset_adata.py +23 -21
  125. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/METADATA +15 -43
  126. smftools-0.2.5.dist-info/RECORD +181 -0
  127. smftools-0.2.4.dist-info/RECORD +0 -176
  128. /smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +0 -0
  129. /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
  130. /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
  131. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
  132. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
  133. {smftools-0.2.4.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -3,6 +3,11 @@ from typing import Optional, Tuple
3
3
 
4
4
  import anndata as ad
5
5
 
6
+ from smftools.logging_utils import get_logger
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
6
11
  def spatial_adata(
7
12
  config_path: str,
8
13
  ) -> Tuple[Optional[ad.AnnData], Optional[Path]]:
@@ -26,10 +31,10 @@ def spatial_adata(
26
31
  spatial_adata_path : Path | None
27
32
  Path to the “current” spatial AnnData (or hmm AnnData if we skip to that).
28
33
  """
29
- from ..readwrite import safe_read_h5ad, make_dirs, add_or_update_column_in_csv
34
+ from ..readwrite import add_or_update_column_in_csv, safe_read_h5ad
35
+ from .helpers import get_adata_paths
30
36
  from .load_adata import load_adata
31
37
  from .preprocess_adata import preprocess_adata
32
- from .helpers import get_adata_paths
33
38
 
34
39
  # 1) Ensure config + basic paths via load_adata
35
40
  loaded_adata, loaded_path, cfg = load_adata(config_path)
@@ -45,21 +50,22 @@ def spatial_adata(
45
50
  if not getattr(cfg, "force_redo_spatial_analyses", False):
46
51
  # If HMM exists, it's the most processed stage — reuse it.
47
52
  if hmm_path.exists():
48
- print(f"HMM AnnData found: {hmm_path}\nSkipping smftools spatial")
53
+ logger.info(f"HMM AnnData found: {hmm_path}\nSkipping smftools spatial")
49
54
  return None, hmm_path
50
55
 
51
56
  # If spatial exists, we consider spatial analyses already done.
52
57
  if spatial_path.exists():
53
- print(f"Spatial AnnData found: {spatial_path}\nSkipping smftools spatial")
58
+ logger.info(f"Spatial AnnData found: {spatial_path}\nSkipping smftools spatial")
54
59
  return None, spatial_path
55
60
 
56
61
  # 2) Ensure preprocessing has been run
57
62
  # This will create pp/pp_dedup as needed or return them if they already exist.
58
- pp_adata, pp_adata_path_ret, pp_dedup_adata, pp_dedup_adata_path_ret = preprocess_adata(config_path)
63
+ pp_adata, pp_adata_path_ret, pp_dedup_adata, pp_dedup_adata_path_ret = preprocess_adata(
64
+ config_path
65
+ )
59
66
 
60
67
  # Helper to load from disk, reusing loaded_adata if it matches
61
68
  def _load(path: Path):
62
- from ..readwrite import safe_read_h5ad
63
69
  if loaded_adata is not None and loaded_path == path:
64
70
  return loaded_adata
65
71
  adata, _ = safe_read_h5ad(path)
@@ -69,15 +75,19 @@ def spatial_adata(
69
75
  # Prefer in-memory pp_dedup_adata when preprocess_adata just ran.
70
76
  if pp_dedup_adata is not None:
71
77
  start_adata = pp_dedup_adata
78
+ source_path = pp_dedup_adata_path_ret
72
79
  else:
73
80
  if pp_dedup_path.exists():
74
81
  start_adata = _load(pp_dedup_path)
82
+ source_path = pp_dedup_path
75
83
  elif pp_path.exists():
76
84
  start_adata = _load(pp_path)
85
+ source_path = pp_path
77
86
  elif raw_path.exists():
78
87
  start_adata = _load(raw_path)
88
+ source_path = raw_path
79
89
  else:
80
- print("No suitable AnnData found for spatial analyses (need at least raw).")
90
+ logger.warning("No suitable AnnData found for spatial analyses (need at least raw).")
81
91
  return None, None
82
92
 
83
93
  # 4) Run the spatial core
@@ -88,6 +98,8 @@ def spatial_adata(
88
98
  pp_adata_path=pp_path,
89
99
  pp_dup_rem_adata_path=pp_dedup_path,
90
100
  pp_adata_in_memory=pp_adata,
101
+ source_adata_path=source_path,
102
+ config_path=config_path,
91
103
  )
92
104
 
93
105
  # 5) Register spatial path in summary CSV
@@ -103,6 +115,8 @@ def spatial_adata_core(
103
115
  pp_adata_path: Path,
104
116
  pp_dup_rem_adata_path: Path,
105
117
  pp_adata_in_memory: Optional[ad.AnnData] = None,
118
+ source_adata_path: Optional[Path] = None,
119
+ config_path: Optional[str] = None,
106
120
  ) -> Tuple[ad.AnnData, Path]:
107
121
  """
108
122
  Core spatial analysis pipeline.
@@ -141,30 +155,30 @@ def spatial_adata_core(
141
155
  import pandas as pd
142
156
  import scanpy as sc
143
157
 
144
- from ..readwrite import make_dirs, safe_read_h5ad
145
- from .helpers import write_gz_h5ad
146
-
147
- from ..preprocessing import (
148
- load_sample_sheet,
149
- invert_adata,
150
- reindex_references_adata,
151
- )
158
+ from ..metadata import record_smftools_metadata
152
159
  from ..plotting import (
153
160
  combined_raw_clustermap,
154
161
  plot_rolling_grid,
155
162
  plot_spatial_autocorr_grid,
156
163
  )
164
+ from ..preprocessing import (
165
+ invert_adata,
166
+ load_sample_sheet,
167
+ reindex_references_adata,
168
+ )
169
+ from ..readwrite import make_dirs, safe_read_h5ad
157
170
  from ..tools import calculate_umap
171
+ from ..tools.position_stats import (
172
+ compute_positionwise_statistics,
173
+ plot_positionwise_matrices,
174
+ )
158
175
  from ..tools.spatial_autocorrelation import (
159
- binary_autocorrelation_with_spacing,
160
176
  analyze_autocorr_matrix,
177
+ binary_autocorrelation_with_spacing,
161
178
  bootstrap_periodicity,
162
179
  rolling_autocorr_metrics,
163
180
  )
164
- from ..tools.position_stats import (
165
- compute_positionwise_statistics,
166
- plot_positionwise_matrices,
167
- )
181
+ from .helpers import write_gz_h5ad
168
182
 
169
183
  # -----------------------------
170
184
  # General setup
@@ -207,7 +221,12 @@ def spatial_adata_core(
207
221
  offsets=cfg.reindexing_offsets,
208
222
  new_col=cfg.reindexed_var_suffix,
209
223
  )
210
-
224
+
225
+ if adata.uns.get("reindex_references_adata_performed", False):
226
+ reindex_suffix = cfg.reindexed_var_suffix
227
+ else:
228
+ reindex_suffix = None
229
+
211
230
  pp_dir = output_directory / "preprocessed"
212
231
  references = adata.obs[cfg.reference_column].cat.categories
213
232
 
@@ -223,7 +242,9 @@ def spatial_adata_core(
223
242
  if pp_clustermap_dir.is_dir() and not getattr(
224
243
  cfg, "force_redo_spatial_analyses", False
225
244
  ):
226
- print(f"{pp_clustermap_dir} already exists. Skipping clustermap plotting for preprocessed AnnData.")
245
+ logger.debug(
246
+ f"{pp_clustermap_dir} already exists. Skipping clustermap plotting for preprocessed AnnData."
247
+ )
227
248
  else:
228
249
  make_dirs([pp_dir, pp_clustermap_dir])
229
250
 
@@ -232,6 +253,34 @@ def spatial_adata_core(
232
253
  else:
233
254
  pp_adata, _ = safe_read_h5ad(pp_adata_path)
234
255
 
256
+ # -----------------------------
257
+ # Optional sample sheet metadata
258
+ # -----------------------------
259
+ if getattr(cfg, "sample_sheet_path", None):
260
+ load_sample_sheet(
261
+ pp_adata,
262
+ cfg.sample_sheet_path,
263
+ mapping_key_column=cfg.sample_sheet_mapping_column,
264
+ as_category=True,
265
+ force_reload=cfg.force_reload_sample_sheet,
266
+ )
267
+
268
+ # -----------------------------
269
+ # Optional inversion along positions axis
270
+ # -----------------------------
271
+ if getattr(cfg, "invert_adata", False):
272
+ pp_adata = invert_adata(pp_adata)
273
+
274
+ # -----------------------------
275
+ # Optional reindexing by reference
276
+ # -----------------------------
277
+ reindex_references_adata(
278
+ pp_adata,
279
+ reference_col=cfg.reference_column,
280
+ offsets=cfg.reindexing_offsets,
281
+ new_col=cfg.reindexed_var_suffix,
282
+ )
283
+
235
284
  combined_raw_clustermap(
236
285
  pp_adata,
237
286
  sample_col=cfg.sample_name_col_for_plotting,
@@ -247,16 +296,19 @@ def spatial_adata_core(
247
296
  cmap_a=cfg.clustermap_cmap_a,
248
297
  min_quality=cfg.read_quality_filter_thresholds[0],
249
298
  min_length=cfg.read_len_filter_thresholds[0],
250
- min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[0],
299
+ min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[
300
+ 0
301
+ ],
251
302
  min_position_valid_fraction=cfg.min_valid_fraction_positions_in_read_vs_ref,
303
+ demux_types=("double", "already"),
252
304
  bins=None,
253
305
  sample_mapping=None,
254
306
  save_path=pp_clustermap_dir,
255
307
  sort_by=cfg.spatial_clustermap_sortby,
256
308
  deaminase=deaminase,
257
- index_col_suffix=cfg.reindexed_var_suffix,
309
+ index_col_suffix=reindex_suffix,
258
310
  )
259
-
311
+
260
312
  # ============================================================
261
313
  # 2) Clustermaps + UMAP on *deduplicated* preprocessed AnnData
262
314
  # ============================================================
@@ -265,10 +317,10 @@ def spatial_adata_core(
265
317
  pp_umap_dir = pp_dir_dedup / "07_umaps"
266
318
 
267
319
  # Clustermaps on deduplicated adata
268
- if pp_clustermap_dir_dedup.is_dir() and not getattr(
269
- cfg, "force_redo_spatial_analyses", False
270
- ):
271
- print(f"{pp_clustermap_dir_dedup} already exists. Skipping clustermap plotting for deduplicated AnnData.")
320
+ if pp_clustermap_dir_dedup.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
321
+ logger.debug(
322
+ f"{pp_clustermap_dir_dedup} already exists. Skipping clustermap plotting for deduplicated AnnData."
323
+ )
272
324
  else:
273
325
  make_dirs([pp_dir_dedup, pp_clustermap_dir_dedup])
274
326
  combined_raw_clustermap(
@@ -286,19 +338,22 @@ def spatial_adata_core(
286
338
  cmap_a=cfg.clustermap_cmap_a,
287
339
  min_quality=cfg.read_quality_filter_thresholds[0],
288
340
  min_length=cfg.read_len_filter_thresholds[0],
289
- min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[0],
341
+ min_mapped_length_to_reference_length_ratio=cfg.read_len_to_ref_ratio_filter_thresholds[
342
+ 0
343
+ ],
290
344
  min_position_valid_fraction=1 - cfg.position_max_nan_threshold,
345
+ demux_types=("double", "already"),
291
346
  bins=None,
292
347
  sample_mapping=None,
293
348
  save_path=pp_clustermap_dir_dedup,
294
349
  sort_by=cfg.spatial_clustermap_sortby,
295
350
  deaminase=deaminase,
296
- index_col_suffix=cfg.reindexed_var_suffix,
351
+ index_col_suffix=reindex_suffix,
297
352
  )
298
353
 
299
354
  # UMAP / Leiden
300
355
  if pp_umap_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
301
- print(f"{pp_umap_dir} already exists. Skipping UMAP plotting.")
356
+ logger.debug(f"{pp_umap_dir} already exists. Skipping UMAP plotting.")
302
357
  else:
303
358
  make_dirs([pp_umap_dir])
304
359
 
@@ -336,40 +391,48 @@ def spatial_adata_core(
336
391
  pp_autocorr_dir = pp_dir_dedup / "08_autocorrelations"
337
392
 
338
393
  if pp_autocorr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
339
- print(f"{pp_autocorr_dir} already exists. Skipping autocorrelation plotting.")
394
+ logger.debug(f"{pp_autocorr_dir} already exists. Skipping autocorrelation plotting.")
340
395
  else:
341
396
  positions = adata.var_names.astype(int).values
342
397
  lags = np.arange(cfg.autocorr_max_lag + 1)
343
398
 
344
399
  try:
345
400
  from joblib import Parallel, delayed
401
+
346
402
  _have_joblib = True
347
403
  except Exception:
348
404
  _have_joblib = False
349
405
 
350
- samples = adata.obs[cfg.sample_name_col_for_plotting].astype("category").cat.categories.tolist()
406
+ samples = (
407
+ adata.obs[cfg.sample_name_col_for_plotting].astype("category").cat.categories.tolist()
408
+ )
351
409
  ref_col = getattr(cfg, "reference_strand_col", "Reference_strand")
352
410
  refs = adata.obs[ref_col].astype("category").cat.categories.tolist()
353
411
 
354
412
  for site_type in cfg.autocorr_site_types:
355
413
  layer_key = f"{site_type}_site_binary"
356
414
  if layer_key not in adata.layers:
357
- print(f"Layer {layer_key} not found in adata.layers — skipping {site_type}.")
415
+ logger.debug(f"Layer {layer_key} not found in adata.layers — skipping {site_type}.")
358
416
  continue
359
417
 
360
418
  X = adata.layers[layer_key]
361
419
  if getattr(X, "shape", (0,))[0] == 0:
362
- print(f"Layer {layer_key} empty — skipping {site_type}.")
420
+ logger.debug(f"Layer {layer_key} empty — skipping {site_type}.")
363
421
  continue
364
422
 
365
423
  rows = []
366
424
  counts = []
367
425
 
368
426
  if _have_joblib:
427
+
369
428
  def _worker(row):
370
429
  try:
371
430
  ac, cnts = binary_autocorrelation_with_spacing(
372
- row, positions, max_lag=cfg.autocorr_max_lag, return_counts=True
431
+ row,
432
+ positions,
433
+ max_lag=cfg.autocorr_max_lag,
434
+ return_counts=True,
435
+ normalize=cfg.autocorr_normalization_method,
373
436
  )
374
437
  except Exception:
375
438
  ac = np.full(cfg.autocorr_max_lag + 1, np.nan, dtype=np.float32)
@@ -385,7 +448,11 @@ def spatial_adata_core(
385
448
  else:
386
449
  for i in range(X.shape[0]):
387
450
  ac, cnts = binary_autocorrelation_with_spacing(
388
- X[i], positions, max_lag=cfg.autocorr_max_lag, return_counts=True
451
+ X[i],
452
+ positions,
453
+ max_lag=cfg.autocorr_max_lag,
454
+ return_counts=True,
455
+ normalize=cfg.autocorr_normalization_method,
389
456
  )
390
457
  rows.append(ac)
391
458
  counts.append(cnts)
@@ -474,7 +541,9 @@ def spatial_adata_core(
474
541
  try:
475
542
  r = analyze_autocorr_matrix(
476
543
  ac_sel,
477
- cnt_sel if cnt_sel is not None else np.zeros_like(ac_sel, dtype=int),
544
+ cnt_sel
545
+ if cnt_sel is not None
546
+ else np.zeros_like(ac_sel, dtype=int),
478
547
  lags,
479
548
  nrl_search_bp=(120, 260),
480
549
  pad_factor=4,
@@ -489,7 +558,9 @@ def spatial_adata_core(
489
558
 
490
559
  adata.uns[f"{site_type}_spatial_periodicity_metrics_by_group"] = metrics_by_group
491
560
 
492
- global_nrl = adata.uns.get(f"{site_type}_spatial_periodicity_metrics", {}).get("nrl_bp", None)
561
+ global_nrl = adata.uns.get(f"{site_type}_spatial_periodicity_metrics", {}).get(
562
+ "nrl_bp", None
563
+ )
493
564
 
494
565
  rolling_cfg = {
495
566
  "window_size": getattr(
@@ -554,27 +625,31 @@ def spatial_adata_core(
554
625
  fixed_nrl_bp=global_nrl,
555
626
  )
556
627
  except Exception as e:
557
- warnings.warn(
628
+ logger.warning(
558
629
  f"rolling_autocorr_metrics failed for {site_type} "
559
630
  f"{sample_name} {ref_label}: {e}"
560
631
  )
561
632
  continue
562
633
 
563
634
  if "center" not in df_roll.columns:
564
- warnings.warn(
635
+ logger.warning(
565
636
  f"rolling_autocorr_metrics returned unexpected schema "
566
637
  f"for {site_type} {sample_name} {ref_label}"
567
638
  )
568
639
  continue
569
640
 
570
- compact_df = df_roll[["center", "n_molecules", "nrl_bp", "snr", "xi", "fwhm_bp"]].copy()
641
+ compact_df = df_roll[
642
+ ["center", "n_molecules", "nrl_bp", "snr", "xi", "fwhm_bp"]
643
+ ].copy()
571
644
  compact_df["site"] = site_type
572
645
  compact_df["sample"] = sample_name
573
646
  compact_df["reference"] = ref_label if ref_label != "all" else "all"
574
647
 
575
648
  if write_csvs:
576
649
  safe_sample = str(sample_name).replace(os.sep, "_")
577
- safe_ref = str(ref_label if ref_label != "all" else "all").replace(os.sep, "_")
650
+ safe_ref = str(ref_label if ref_label != "all" else "all").replace(
651
+ os.sep, "_"
652
+ )
578
653
  out_csv = os.path.join(
579
654
  site_out_dir,
580
655
  f"{safe_sample}__{safe_ref}__rolling_metrics.csv",
@@ -582,7 +657,7 @@ def spatial_adata_core(
582
657
  try:
583
658
  compact_df.to_csv(out_csv, index=False)
584
659
  except Exception as e:
585
- warnings.warn(f"Failed to write rolling CSV {out_csv}: {e}")
660
+ logger.warning(f"Failed to write rolling CSV {out_csv}: {e}")
586
661
 
587
662
  if write_plots:
588
663
  try:
@@ -604,7 +679,7 @@ def spatial_adata_core(
604
679
  show=False,
605
680
  )
606
681
  except Exception as e:
607
- warnings.warn(
682
+ logger.warning(
608
683
  f"Failed to create rolling plot for {site_type} "
609
684
  f"{sample_name} {ref_label}: {e}"
610
685
  )
@@ -612,7 +687,9 @@ def spatial_adata_core(
612
687
  combined_rows.append(
613
688
  compact_df.assign(site=site_type, sample=sample_name, reference=ref_label)
614
689
  )
615
- rolling_results_by_group[(sample_name, None if ref_label == "all" else ref_label)] = compact_df
690
+ rolling_results_by_group[
691
+ (sample_name, None if ref_label == "all" else ref_label)
692
+ ] = compact_df
616
693
 
617
694
  adata.uns[f"{site_type}_rolling_metrics_by_group"] = rolling_results_by_group
618
695
 
@@ -624,9 +701,7 @@ def spatial_adata_core(
624
701
  try:
625
702
  combined_df_site.to_csv(combined_out_csv, index=False)
626
703
  except Exception as e:
627
- warnings.warn(
628
- f"Failed to write combined rolling CSV for {site_type}: {e}"
629
- )
704
+ logger.warning(f"Failed to write combined rolling CSV for {site_type}: {e}")
630
705
 
631
706
  rolling_dict = adata.uns[f"{site_type}_rolling_metrics_by_group"]
632
707
  plot_out_dir = os.path.join(pp_autocorr_dir, "rolling_plots")
@@ -650,6 +725,7 @@ def spatial_adata_core(
650
725
  sample_col=cfg.sample_name_col_for_plotting,
651
726
  window=cfg.autocorr_rolling_window_size,
652
727
  rows_per_fig=cfg.rows_per_qc_autocorr_grid,
728
+ normalization_method=cfg.autocorr_normalization_method,
653
729
  )
654
730
 
655
731
  # ============================================================
@@ -658,7 +734,7 @@ def spatial_adata_core(
658
734
  pp_corr_dir = pp_dir_dedup / "09_correlation_matrices"
659
735
 
660
736
  if pp_corr_dir.is_dir() and not getattr(cfg, "force_redo_spatial_analyses", False):
661
- print(f"{pp_corr_dir} already exists. Skipping correlation matrix plotting.")
737
+ logger.debug(f"{pp_corr_dir} already exists. Skipping correlation matrix plotting.")
662
738
  else:
663
739
  compute_positionwise_statistics(
664
740
  adata,
@@ -691,7 +767,15 @@ def spatial_adata_core(
691
767
  # 5) Save spatial AnnData
692
768
  # ============================================================
693
769
  if (not spatial_adata_path.exists()) or getattr(cfg, "force_redo_spatial_analyses", False):
694
- print("Saving spatial analyzed AnnData (post preprocessing and duplicate removal).")
770
+ logger.info("Saving spatial analyzed AnnData (post preprocessing and duplicate removal).")
771
+ record_smftools_metadata(
772
+ adata,
773
+ step_name="spatial",
774
+ cfg=cfg,
775
+ config_path=config_path,
776
+ input_paths=[source_adata_path] if source_adata_path else None,
777
+ output_path=spatial_adata_path,
778
+ )
695
779
  write_gz_h5ad(adata, spatial_adata_path)
696
780
 
697
- return adata, spatial_adata_path
781
+ return adata, spatial_adata_path