smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. smftools/__init__.py +6 -8
  2. smftools/_settings.py +4 -6
  3. smftools/_version.py +1 -1
  4. smftools/cli/helpers.py +54 -0
  5. smftools/cli/hmm_adata.py +937 -256
  6. smftools/cli/load_adata.py +448 -268
  7. smftools/cli/preprocess_adata.py +469 -263
  8. smftools/cli/spatial_adata.py +536 -319
  9. smftools/cli_entry.py +97 -182
  10. smftools/config/__init__.py +1 -1
  11. smftools/config/conversion.yaml +17 -6
  12. smftools/config/deaminase.yaml +12 -10
  13. smftools/config/default.yaml +142 -33
  14. smftools/config/direct.yaml +11 -3
  15. smftools/config/discover_input_files.py +19 -5
  16. smftools/config/experiment_config.py +594 -264
  17. smftools/constants.py +37 -0
  18. smftools/datasets/__init__.py +2 -8
  19. smftools/datasets/datasets.py +32 -18
  20. smftools/hmm/HMM.py +2128 -1418
  21. smftools/hmm/__init__.py +2 -9
  22. smftools/hmm/archived/call_hmm_peaks.py +121 -0
  23. smftools/hmm/call_hmm_peaks.py +299 -91
  24. smftools/hmm/display_hmm.py +19 -6
  25. smftools/hmm/hmm_readwrite.py +13 -4
  26. smftools/hmm/nucleosome_hmm_refinement.py +102 -14
  27. smftools/informatics/__init__.py +30 -7
  28. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
  29. smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
  30. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
  31. smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
  32. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
  33. smftools/informatics/archived/print_bam_query_seq.py +7 -1
  34. smftools/informatics/bam_functions.py +397 -175
  35. smftools/informatics/basecalling.py +51 -9
  36. smftools/informatics/bed_functions.py +90 -57
  37. smftools/informatics/binarize_converted_base_identities.py +18 -7
  38. smftools/informatics/complement_base_list.py +7 -6
  39. smftools/informatics/converted_BAM_to_adata.py +265 -122
  40. smftools/informatics/fasta_functions.py +161 -83
  41. smftools/informatics/h5ad_functions.py +196 -30
  42. smftools/informatics/modkit_extract_to_adata.py +609 -270
  43. smftools/informatics/modkit_functions.py +85 -44
  44. smftools/informatics/ohe.py +44 -21
  45. smftools/informatics/pod5_functions.py +112 -73
  46. smftools/informatics/run_multiqc.py +20 -14
  47. smftools/logging_utils.py +51 -0
  48. smftools/machine_learning/__init__.py +2 -7
  49. smftools/machine_learning/data/anndata_data_module.py +143 -50
  50. smftools/machine_learning/data/preprocessing.py +2 -1
  51. smftools/machine_learning/evaluation/__init__.py +1 -1
  52. smftools/machine_learning/evaluation/eval_utils.py +11 -14
  53. smftools/machine_learning/evaluation/evaluators.py +46 -33
  54. smftools/machine_learning/inference/__init__.py +1 -1
  55. smftools/machine_learning/inference/inference_utils.py +7 -4
  56. smftools/machine_learning/inference/lightning_inference.py +9 -13
  57. smftools/machine_learning/inference/sklearn_inference.py +6 -8
  58. smftools/machine_learning/inference/sliding_window_inference.py +35 -25
  59. smftools/machine_learning/models/__init__.py +10 -5
  60. smftools/machine_learning/models/base.py +28 -42
  61. smftools/machine_learning/models/cnn.py +15 -11
  62. smftools/machine_learning/models/lightning_base.py +71 -40
  63. smftools/machine_learning/models/mlp.py +13 -4
  64. smftools/machine_learning/models/positional.py +3 -2
  65. smftools/machine_learning/models/rnn.py +3 -2
  66. smftools/machine_learning/models/sklearn_models.py +39 -22
  67. smftools/machine_learning/models/transformer.py +68 -53
  68. smftools/machine_learning/models/wrappers.py +2 -1
  69. smftools/machine_learning/training/__init__.py +2 -2
  70. smftools/machine_learning/training/train_lightning_model.py +29 -20
  71. smftools/machine_learning/training/train_sklearn_model.py +9 -15
  72. smftools/machine_learning/utils/__init__.py +1 -1
  73. smftools/machine_learning/utils/device.py +7 -4
  74. smftools/machine_learning/utils/grl.py +3 -1
  75. smftools/metadata.py +443 -0
  76. smftools/plotting/__init__.py +19 -5
  77. smftools/plotting/autocorrelation_plotting.py +145 -44
  78. smftools/plotting/classifiers.py +162 -72
  79. smftools/plotting/general_plotting.py +422 -197
  80. smftools/plotting/hmm_plotting.py +42 -13
  81. smftools/plotting/position_stats.py +147 -87
  82. smftools/plotting/qc_plotting.py +20 -12
  83. smftools/preprocessing/__init__.py +10 -12
  84. smftools/preprocessing/append_base_context.py +115 -80
  85. smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
  86. smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
  87. smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
  88. smftools/preprocessing/binarize.py +21 -4
  89. smftools/preprocessing/binarize_on_Youden.py +129 -31
  90. smftools/preprocessing/binary_layers_to_ohe.py +17 -11
  91. smftools/preprocessing/calculate_complexity_II.py +86 -59
  92. smftools/preprocessing/calculate_consensus.py +28 -19
  93. smftools/preprocessing/calculate_coverage.py +50 -25
  94. smftools/preprocessing/calculate_pairwise_differences.py +2 -1
  95. smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
  96. smftools/preprocessing/calculate_position_Youden.py +118 -54
  97. smftools/preprocessing/calculate_read_length_stats.py +52 -23
  98. smftools/preprocessing/calculate_read_modification_stats.py +91 -57
  99. smftools/preprocessing/clean_NaN.py +38 -28
  100. smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
  101. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
  102. smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
  103. smftools/preprocessing/flag_duplicate_reads.py +689 -272
  104. smftools/preprocessing/invert_adata.py +26 -11
  105. smftools/preprocessing/load_sample_sheet.py +40 -22
  106. smftools/preprocessing/make_dirs.py +8 -3
  107. smftools/preprocessing/min_non_diagonal.py +2 -1
  108. smftools/preprocessing/recipes.py +56 -23
  109. smftools/preprocessing/reindex_references_adata.py +103 -0
  110. smftools/preprocessing/subsample_adata.py +33 -16
  111. smftools/readwrite.py +331 -82
  112. smftools/schema/__init__.py +11 -0
  113. smftools/schema/anndata_schema_v1.yaml +227 -0
  114. smftools/tools/__init__.py +3 -4
  115. smftools/tools/archived/classifiers.py +163 -0
  116. smftools/tools/archived/subset_adata_v1.py +10 -1
  117. smftools/tools/archived/subset_adata_v2.py +12 -1
  118. smftools/tools/calculate_umap.py +54 -15
  119. smftools/tools/cluster_adata_on_methylation.py +115 -46
  120. smftools/tools/general_tools.py +70 -25
  121. smftools/tools/position_stats.py +229 -98
  122. smftools/tools/read_stats.py +50 -29
  123. smftools/tools/spatial_autocorrelation.py +365 -192
  124. smftools/tools/subset_adata.py +23 -21
  125. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
  126. smftools-0.2.5.dist-info/RECORD +181 -0
  127. smftools-0.2.3.dist-info/RECORD +0 -173
  128. /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
  129. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  130. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  131. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  132. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
  133. /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
  134. /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
  135. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
  136. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
  137. {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,9 @@
1
1
  from typing import Optional
2
2
 
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
3
7
  def plot_spatial_autocorr_grid(
4
8
  adata,
5
9
  out_dir: str,
@@ -14,6 +18,7 @@ def plot_spatial_autocorr_grid(
14
18
  references: Optional[list] = None,
15
19
  annotate_periodicity: bool = True,
16
20
  counts_key_suffix: str = "_counts",
21
+ normalization_method: str = "pearson",
17
22
  # plotting thresholds
18
23
  plot_min_count: int = 10,
19
24
  ):
@@ -28,14 +33,15 @@ def plot_spatial_autocorr_grid(
28
33
  fall back to running the analyzer for that group (slow) and cache the result into adata.uns.
29
34
  """
30
35
  import os
31
- import numpy as np
32
- import pandas as pd
33
- import matplotlib.pyplot as plt
34
36
  import warnings
35
37
 
38
+ import matplotlib.pyplot as plt
39
+
36
40
  # Try importing analyzer (used only as fallback)
37
41
  try:
38
- from ..tools.spatial_autocorrelation import analyze_autocorr_matrix # prefer packaged analyzer
42
+ from ..tools.spatial_autocorrelation import (
43
+ analyze_autocorr_matrix,
44
+ ) # prefer packaged analyzer
39
45
  except Exception:
40
46
  analyze_autocorr_matrix = globals().get("analyze_autocorr_matrix", None)
41
47
 
@@ -44,6 +50,7 @@ def plot_spatial_autocorr_grid(
44
50
 
45
51
  # small rolling average helper for smoother visualization
46
52
  def _rolling_1d(arr: np.ndarray, win: int) -> np.ndarray:
53
+ """Compute a rolling mean with NaN-aware normalization."""
47
54
  if win <= 1:
48
55
  return arr
49
56
  valid = np.isfinite(arr).astype(float)
@@ -58,6 +65,7 @@ def plot_spatial_autocorr_grid(
58
65
 
59
66
  # group summary extractor: returns (lags, mean_curve_smoothed, std_curve_smoothed, counts_block_or_None)
60
67
  def _compute_group_summary_for_mask(site: str, mask: np.ndarray):
68
+ """Extract summary curves for a site and mask."""
61
69
  obsm_key = f"{site}_spatial_autocorr"
62
70
  lags_key = f"{site}_spatial_autocorr_lags"
63
71
  counts_key = f"{site}_spatial_autocorr{counts_key_suffix}"
@@ -75,7 +83,12 @@ def plot_spatial_autocorr_grid(
75
83
  if counts_key in adata.obsm:
76
84
  counts_mat = np.asarray(adata.obsm[counts_key])
77
85
  counts = counts_mat[mask, :].astype(int)
78
- return np.asarray(adata.uns[lags_key]), _rolling_1d(mean_per_lag, window), _rolling_1d(std_per_lag, window), counts
86
+ return (
87
+ np.asarray(adata.uns[lags_key]),
88
+ _rolling_1d(mean_per_lag, window),
89
+ _rolling_1d(std_per_lag, window),
90
+ counts,
91
+ )
79
92
 
80
93
  # samples meta
81
94
  if sample_col not in adata.obs:
@@ -116,7 +129,8 @@ def plot_spatial_autocorr_grid(
116
129
  nrows = len(chunk)
117
130
 
118
131
  fig, axes = plt.subplots(
119
- nrows=nrows, ncols=ncols,
132
+ nrows=nrows,
133
+ ncols=ncols,
120
134
  figsize=(4.2 * ncols, 2.4 * nrows),
121
135
  dpi=dpi,
122
136
  squeeze=False,
@@ -141,9 +155,9 @@ def plot_spatial_autocorr_grid(
141
155
  ax = axes[r, col_idx]
142
156
 
143
157
  # compute mask
144
- sample_mask = (adata.obs[sample_col].values == sample_name)
158
+ sample_mask = adata.obs[sample_col].values == sample_name
145
159
  if col_kind == "ref":
146
- ref_mask = (adata.obs[reference_col].values == col_val)
160
+ ref_mask = adata.obs[reference_col].values == col_val
147
161
  mask = sample_mask & ref_mask
148
162
  else:
149
163
  mask = sample_mask
@@ -152,7 +166,9 @@ def plot_spatial_autocorr_grid(
152
166
  n_reads_grp = int(mask.sum())
153
167
 
154
168
  # group summary (mean/std and counts_block)
155
- lags_local, mean_curve, std_curve, counts_block = _compute_group_summary_for_mask(site, mask)
169
+ lags_local, mean_curve, std_curve, counts_block = (
170
+ _compute_group_summary_for_mask(site, mask)
171
+ )
156
172
 
157
173
  # plot title for top row
158
174
  if r == 0:
@@ -164,9 +180,12 @@ def plot_spatial_autocorr_grid(
164
180
  ax.text(0.5, 0.5, "No data", ha="center", va="center", fontsize=8)
165
181
  ax.set_xlim(0, 1)
166
182
  ax.set_xlabel("Lag (bp)", fontsize=7)
167
- ax.tick_params(axis='both', which='major', labelsize=6)
183
+ ax.set_ylabel(
184
+ f"Autocorrelation {normalization_method} normalized", fontsize=7
185
+ )
186
+ ax.tick_params(axis="both", which="major", labelsize=6)
168
187
  ax.grid(True, alpha=0.22)
169
- #col_idx += 1
188
+ # col_idx += 1
170
189
  continue
171
190
 
172
191
  # mask low-support lags if counts available
@@ -186,7 +205,13 @@ def plot_spatial_autocorr_grid(
186
205
 
187
206
  # plot a faint grey line for the low-support regions (context only)
188
207
  if low_support.any():
189
- ax.plot(lags_local[low_support], mean_curve_smooth[low_support], color="0.85", lw=0.6, label="_nolegend_")
208
+ ax.plot(
209
+ lags_local[low_support],
210
+ mean_curve_smooth[low_support],
211
+ color="0.85",
212
+ lw=0.6,
213
+ label="_nolegend_",
214
+ )
190
215
 
191
216
  # plot mean (high-support only) and +/- std (std is computed from all molecules)
192
217
  ax.plot(lags_local, mean_plot, lw=1.1)
@@ -201,16 +226,25 @@ def plot_spatial_autocorr_grid(
201
226
  # metrics_by_group_precomp can be dict-like
202
227
  res = metrics_by_group_precomp.get(group_key, None)
203
228
 
204
- if res is None and annotate_periodicity and (analyze_autocorr_matrix is not None) and (ac_full is not None):
229
+ if (
230
+ res is None
231
+ and annotate_periodicity
232
+ and (analyze_autocorr_matrix is not None)
233
+ and (ac_full is not None)
234
+ ):
205
235
  # fallback: run analyzer on the subset (warn + cache)
206
236
  ac_sel = ac_full[mask, :]
207
237
  cnt_sel = counts_full[mask, :] if counts_full is not None else None
208
238
  if ac_sel.size:
209
- warnings.warn(f"Precomputed periodicity metrics for {site} {group_key} not found — running analyzer as fallback (slow).")
239
+ warnings.warn(
240
+ f"Precomputed periodicity metrics for {site} {group_key} not found — running analyzer as fallback (slow)."
241
+ )
210
242
  try:
211
243
  res = analyze_autocorr_matrix(
212
244
  ac_sel,
213
- cnt_sel if cnt_sel is not None else np.zeros_like(ac_sel, dtype=int),
245
+ cnt_sel
246
+ if cnt_sel is not None
247
+ else np.zeros_like(ac_sel, dtype=int),
214
248
  lags_local,
215
249
  nrl_search_bp=(120, 260),
216
250
  pad_factor=4,
@@ -239,19 +273,38 @@ def plot_spatial_autocorr_grid(
239
273
 
240
274
  # vertical NRL line & harmonics (safe check)
241
275
  if (nrl is not None) and np.isfinite(nrl):
242
- ax.axvline(float(nrl), color="C3", linestyle="--", linewidth=1.0, alpha=0.9)
276
+ ax.axvline(
277
+ float(nrl), color="C3", linestyle="--", linewidth=1.0, alpha=0.9
278
+ )
243
279
  for m in range(2, 5):
244
- ax.axvline(float(nrl) * m, color="C3", linestyle=":", linewidth=0.7, alpha=0.6)
280
+ ax.axvline(
281
+ float(nrl) * m,
282
+ color="C3",
283
+ linestyle=":",
284
+ linewidth=0.7,
285
+ alpha=0.6,
286
+ )
245
287
 
246
288
  # envelope points + fitted exponential
247
289
  if sample_lags.size:
248
290
  ax.scatter(sample_lags, envelope_heights, s=18, color="C2")
249
- if (xi_val is not None) and np.isfinite(xi_val) and np.isfinite(res.get("xi_A", np.nan)):
291
+ if (
292
+ (xi_val is not None)
293
+ and np.isfinite(xi_val)
294
+ and np.isfinite(res.get("xi_A", np.nan))
295
+ ):
250
296
  A = float(res.get("xi_A", np.nan))
251
297
  xi_val = float(xi_val)
252
298
  env_x = np.linspace(np.min(sample_lags), np.max(sample_lags), 200)
253
299
  env_y = A * np.exp(-env_x / xi_val)
254
- ax.plot(env_x, env_y, linestyle="--", color="C2", linewidth=1.0, alpha=0.9)
300
+ ax.plot(
301
+ env_x,
302
+ env_y,
303
+ linestyle="--",
304
+ color="C2",
305
+ linewidth=1.0,
306
+ alpha=0.9,
307
+ )
255
308
 
256
309
  # inset PSD plotted vs NRL (linear x-axis)
257
310
  freqs = res.get("freqs", None)
@@ -266,7 +319,12 @@ def plot_spatial_autocorr_grid(
266
319
  nrl_vals = 1.0 / freqs[valid] # convert freq -> NRL (bp)
267
320
  inset.plot(nrl_vals, power[valid], lw=0.7)
268
321
  if peak_f is not None and peak_f > 0:
269
- inset.axvline(1.0 / float(peak_f), color="C3", linestyle="--", linewidth=0.8)
322
+ inset.axvline(
323
+ 1.0 / float(peak_f),
324
+ color="C3",
325
+ linestyle="--",
326
+ linewidth=0.8,
327
+ )
270
328
  # choose a reasonable linear x-limits (prefer typical NRL range but fallback to data)
271
329
  default_xlim = (60, 400)
272
330
  data_xlim = (float(np.nanmin(nrl_vals)), 600)
@@ -278,17 +336,29 @@ def plot_spatial_autocorr_grid(
278
336
  inset.set_ylabel("power", fontsize=6)
279
337
  inset.tick_params(labelsize=6)
280
338
  if (snr is not None) and np.isfinite(snr):
281
- inset.text(0.95, 0.95, f"SNR={float(snr):.1f}", transform=inset.transAxes,
282
- ha="right", va="top", fontsize=6, bbox=dict(facecolor="white", alpha=0.6, edgecolor="none"))
339
+ inset.text(
340
+ 0.95,
341
+ 0.95,
342
+ f"SNR={float(snr):.1f}",
343
+ transform=inset.transAxes,
344
+ ha="right",
345
+ va="top",
346
+ fontsize=6,
347
+ bbox=dict(facecolor="white", alpha=0.6, edgecolor="none"),
348
+ )
283
349
 
284
350
  # set x-limits based on finite lags
285
351
  finite_mask = np.isfinite(lags_local)
286
352
  if finite_mask.any():
287
- ax.set_xlim(float(np.nanmin(lags_local[finite_mask])), float(np.nanmax(lags_local[finite_mask])))
353
+ ax.set_xlim(
354
+ float(np.nanmin(lags_local[finite_mask])),
355
+ float(np.nanmax(lags_local[finite_mask])),
356
+ )
288
357
 
289
358
  # small cosmetics
290
359
  ax.set_xlabel("Lag (bp)", fontsize=7)
291
- ax.tick_params(axis='both', which='major', labelsize=6)
360
+ ax.set_ylabel(f"Autocorrelation {normalization_method} normalized", fontsize=7)
361
+ ax.tick_params(axis="both", which="major", labelsize=6)
292
362
  ax.grid(True, alpha=0.22)
293
363
 
294
364
  col_idx += 1
@@ -301,9 +371,13 @@ def plot_spatial_autocorr_grid(
301
371
  ycenter = pos.y0 + pos.height / 2.0
302
372
  n_reads_grp = int((adata.obs[sample_col].values == sample_name).sum())
303
373
  label = f"{sample_name}\n(n={n_reads_grp})"
304
- fig.text(0.02, ycenter, label, va='center', ha='left', rotation='vertical', fontsize=9)
374
+ fig.text(0.02, ycenter, label, va="center", ha="left", rotation="vertical", fontsize=9)
305
375
 
306
- fig.suptitle("Spatial autocorrelation by sample × (site_type × reference)", y=0.995, fontsize=11)
376
+ fig.suptitle(
377
+ f"Spatial autocorrelation ({normalization_method}) by sample × (site_type × reference)",
378
+ y=0.995,
379
+ fontsize=11,
380
+ )
307
381
 
308
382
  page_idx = start_idx // rows_per_fig + 1
309
383
  out_png = os.path.join(out_dir, f"{filename_prefix}_page{page_idx}.png")
@@ -365,6 +439,7 @@ def plot_spatial_autocorr_grid(
365
439
  return arr.tolist()
366
440
 
367
441
  def _safe_float(x):
442
+ """Coerce a value to float, returning NaN on failure."""
368
443
  try:
369
444
  return float(x)
370
445
  except Exception:
@@ -381,15 +456,33 @@ def plot_spatial_autocorr_grid(
381
456
  "site": site,
382
457
  "sample": sample_name,
383
458
  "reference": ref,
384
- "nrl_bp": _safe_float(entry.get("nrl_bp", float("nan"))) if entry is not None else float("nan"),
385
- "snr": _safe_float(entry.get("snr", float("nan"))) if entry is not None else float("nan"),
386
- "fwhm_bp": _safe_float(entry.get("fwhm_bp", float("nan"))) if entry is not None else float("nan"),
387
- "xi": _safe_float(entry.get("xi", float("nan"))) if entry is not None else float("nan"),
388
- "xi_A": _safe_float(entry.get("xi_A", float("nan"))) if entry is not None else float("nan"),
389
- "xi_r2": _safe_float(entry.get("xi_r2", float("nan"))) if entry is not None else float("nan"),
390
- "envelope_sample_lags": ";".join(map(str, env_lags_list)) if len(env_lags_list) else "",
391
- "envelope_heights": ";".join(map(str, env_heights_list)) if len(env_heights_list) else "",
392
- "analyzer_error": entry.get("error", entry.get("analyzer_error", None)) if entry is not None else "no_metrics",
459
+ "nrl_bp": _safe_float(entry.get("nrl_bp", float("nan")))
460
+ if entry is not None
461
+ else float("nan"),
462
+ "snr": _safe_float(entry.get("snr", float("nan")))
463
+ if entry is not None
464
+ else float("nan"),
465
+ "fwhm_bp": _safe_float(entry.get("fwhm_bp", float("nan")))
466
+ if entry is not None
467
+ else float("nan"),
468
+ "xi": _safe_float(entry.get("xi", float("nan")))
469
+ if entry is not None
470
+ else float("nan"),
471
+ "xi_A": _safe_float(entry.get("xi_A", float("nan")))
472
+ if entry is not None
473
+ else float("nan"),
474
+ "xi_r2": _safe_float(entry.get("xi_r2", float("nan")))
475
+ if entry is not None
476
+ else float("nan"),
477
+ "envelope_sample_lags": ";".join(map(str, env_lags_list))
478
+ if len(env_lags_list)
479
+ else "",
480
+ "envelope_heights": ";".join(map(str, env_heights_list))
481
+ if len(env_heights_list)
482
+ else "",
483
+ "analyzer_error": entry.get("error", entry.get("analyzer_error", None))
484
+ if entry is not None
485
+ else "no_metrics",
393
486
  }
394
487
  rows.append(row)
395
488
  combined_rows.append(row)
@@ -404,6 +497,7 @@ def plot_spatial_autocorr_grid(
404
497
  except Exception as e:
405
498
  # don't fail the whole pipeline for a single write error; log and continue
406
499
  import warnings
500
+
407
501
  warnings.warn(f"Failed to write {out_csv}: {e}")
408
502
 
409
503
  # write the single combined CSV (one row per sample x ref x site)
@@ -413,16 +507,19 @@ def plot_spatial_autocorr_grid(
413
507
  combined_df.to_csv(combined_out, index=False)
414
508
  except Exception as e:
415
509
  import warnings
510
+
416
511
  warnings.warn(f"Failed to write combined CSV {combined_out}: {e}")
417
512
 
418
513
  return saved_pages
419
514
 
515
+
420
516
  def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=160, show=False):
421
517
  """
422
518
  Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
423
519
  If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
424
520
  """
425
521
  import matplotlib.pyplot as plt
522
+
426
523
  # sort by center
427
524
  df2 = df.sort_values("center")
428
525
  x = df2["center"].values
@@ -447,15 +544,15 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
447
544
  fig.savefig(out_png, bbox_inches="tight")
448
545
  if not show:
449
546
  import matplotlib
547
+
450
548
  matplotlib.pyplot.close(fig)
451
549
  return out_png
452
550
  if not show:
453
551
  import matplotlib
552
+
454
553
  matplotlib.pyplot.close(fig)
455
554
  return fig
456
555
 
457
- import numpy as np
458
- import pandas as pd
459
556
 
460
557
  def plot_rolling_grid(
461
558
  rolling_dict,
@@ -502,10 +599,8 @@ def plot_rolling_grid(
502
599
  pages_by_metric : dict mapping metric -> [out_png_paths]
503
600
  """
504
601
  import os
505
- import math
602
+
506
603
  import matplotlib.pyplot as plt
507
- import numpy as np
508
- import pandas as pd
509
604
 
510
605
  if per_metric_ylim is None:
511
606
  per_metric_ylim = {}
@@ -520,7 +615,7 @@ def plot_rolling_grid(
520
615
 
521
616
  # normalize reference labels and keep mapping to original
522
617
  label_to_orig = {}
523
- for (_sample, ref) in keys:
618
+ for _sample, ref in keys:
524
619
  label = "all" if (ref is None) else str(ref)
525
620
  if label not in label_to_orig:
526
621
  label_to_orig[label] = ref
@@ -532,7 +627,11 @@ def plot_rolling_grid(
532
627
  # reference labels ordering
533
628
  default_ref_labels = sorted(label_to_orig.keys(), key=lambda s: s)
534
629
  if reference_order is not None:
535
- ref_labels = [("all" if r is None else str(r)) for r in reference_order if (("all" if r is None else str(r)) in label_to_orig)]
630
+ ref_labels = [
631
+ ("all" if r is None else str(r))
632
+ for r in reference_order
633
+ if (("all" if r is None else str(r)) in label_to_orig)
634
+ ]
536
635
  else:
537
636
  ref_labels = default_ref_labels
538
637
 
@@ -553,9 +652,11 @@ def plot_rolling_grid(
553
652
  nrows = len(page_samples)
554
653
 
555
654
  fig, axes = plt.subplots(
556
- nrows=nrows, ncols=cols_per_page,
655
+ nrows=nrows,
656
+ ncols=cols_per_page,
557
657
  figsize=(figsize_per_panel[0] * cols_per_page, figsize_per_panel[1] * nrows),
558
- dpi=dpi, squeeze=False
658
+ dpi=dpi,
659
+ squeeze=False,
559
660
  )
560
661
 
561
662
  for i, sample in enumerate(page_samples):