smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +6 -8
- smftools/_settings.py +4 -6
- smftools/_version.py +1 -1
- smftools/cli/helpers.py +54 -0
- smftools/cli/hmm_adata.py +937 -256
- smftools/cli/load_adata.py +448 -268
- smftools/cli/preprocess_adata.py +469 -263
- smftools/cli/spatial_adata.py +536 -319
- smftools/cli_entry.py +97 -182
- smftools/config/__init__.py +1 -1
- smftools/config/conversion.yaml +17 -6
- smftools/config/deaminase.yaml +12 -10
- smftools/config/default.yaml +142 -33
- smftools/config/direct.yaml +11 -3
- smftools/config/discover_input_files.py +19 -5
- smftools/config/experiment_config.py +594 -264
- smftools/constants.py +37 -0
- smftools/datasets/__init__.py +2 -8
- smftools/datasets/datasets.py +32 -18
- smftools/hmm/HMM.py +2128 -1418
- smftools/hmm/__init__.py +2 -9
- smftools/hmm/archived/call_hmm_peaks.py +121 -0
- smftools/hmm/call_hmm_peaks.py +299 -91
- smftools/hmm/display_hmm.py +19 -6
- smftools/hmm/hmm_readwrite.py +13 -4
- smftools/hmm/nucleosome_hmm_refinement.py +102 -14
- smftools/informatics/__init__.py +30 -7
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
- smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
- smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
- smftools/informatics/archived/print_bam_query_seq.py +7 -1
- smftools/informatics/bam_functions.py +397 -175
- smftools/informatics/basecalling.py +51 -9
- smftools/informatics/bed_functions.py +90 -57
- smftools/informatics/binarize_converted_base_identities.py +18 -7
- smftools/informatics/complement_base_list.py +7 -6
- smftools/informatics/converted_BAM_to_adata.py +265 -122
- smftools/informatics/fasta_functions.py +161 -83
- smftools/informatics/h5ad_functions.py +196 -30
- smftools/informatics/modkit_extract_to_adata.py +609 -270
- smftools/informatics/modkit_functions.py +85 -44
- smftools/informatics/ohe.py +44 -21
- smftools/informatics/pod5_functions.py +112 -73
- smftools/informatics/run_multiqc.py +20 -14
- smftools/logging_utils.py +51 -0
- smftools/machine_learning/__init__.py +2 -7
- smftools/machine_learning/data/anndata_data_module.py +143 -50
- smftools/machine_learning/data/preprocessing.py +2 -1
- smftools/machine_learning/evaluation/__init__.py +1 -1
- smftools/machine_learning/evaluation/eval_utils.py +11 -14
- smftools/machine_learning/evaluation/evaluators.py +46 -33
- smftools/machine_learning/inference/__init__.py +1 -1
- smftools/machine_learning/inference/inference_utils.py +7 -4
- smftools/machine_learning/inference/lightning_inference.py +9 -13
- smftools/machine_learning/inference/sklearn_inference.py +6 -8
- smftools/machine_learning/inference/sliding_window_inference.py +35 -25
- smftools/machine_learning/models/__init__.py +10 -5
- smftools/machine_learning/models/base.py +28 -42
- smftools/machine_learning/models/cnn.py +15 -11
- smftools/machine_learning/models/lightning_base.py +71 -40
- smftools/machine_learning/models/mlp.py +13 -4
- smftools/machine_learning/models/positional.py +3 -2
- smftools/machine_learning/models/rnn.py +3 -2
- smftools/machine_learning/models/sklearn_models.py +39 -22
- smftools/machine_learning/models/transformer.py +68 -53
- smftools/machine_learning/models/wrappers.py +2 -1
- smftools/machine_learning/training/__init__.py +2 -2
- smftools/machine_learning/training/train_lightning_model.py +29 -20
- smftools/machine_learning/training/train_sklearn_model.py +9 -15
- smftools/machine_learning/utils/__init__.py +1 -1
- smftools/machine_learning/utils/device.py +7 -4
- smftools/machine_learning/utils/grl.py +3 -1
- smftools/metadata.py +443 -0
- smftools/plotting/__init__.py +19 -5
- smftools/plotting/autocorrelation_plotting.py +145 -44
- smftools/plotting/classifiers.py +162 -72
- smftools/plotting/general_plotting.py +422 -197
- smftools/plotting/hmm_plotting.py +42 -13
- smftools/plotting/position_stats.py +147 -87
- smftools/plotting/qc_plotting.py +20 -12
- smftools/preprocessing/__init__.py +10 -12
- smftools/preprocessing/append_base_context.py +115 -80
- smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
- smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
- smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
- smftools/preprocessing/binarize.py +21 -4
- smftools/preprocessing/binarize_on_Youden.py +129 -31
- smftools/preprocessing/binary_layers_to_ohe.py +17 -11
- smftools/preprocessing/calculate_complexity_II.py +86 -59
- smftools/preprocessing/calculate_consensus.py +28 -19
- smftools/preprocessing/calculate_coverage.py +50 -25
- smftools/preprocessing/calculate_pairwise_differences.py +2 -1
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
- smftools/preprocessing/calculate_position_Youden.py +118 -54
- smftools/preprocessing/calculate_read_length_stats.py +52 -23
- smftools/preprocessing/calculate_read_modification_stats.py +91 -57
- smftools/preprocessing/clean_NaN.py +38 -28
- smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
- smftools/preprocessing/flag_duplicate_reads.py +689 -272
- smftools/preprocessing/invert_adata.py +26 -11
- smftools/preprocessing/load_sample_sheet.py +40 -22
- smftools/preprocessing/make_dirs.py +8 -3
- smftools/preprocessing/min_non_diagonal.py +2 -1
- smftools/preprocessing/recipes.py +56 -23
- smftools/preprocessing/reindex_references_adata.py +103 -0
- smftools/preprocessing/subsample_adata.py +33 -16
- smftools/readwrite.py +331 -82
- smftools/schema/__init__.py +11 -0
- smftools/schema/anndata_schema_v1.yaml +227 -0
- smftools/tools/__init__.py +3 -4
- smftools/tools/archived/classifiers.py +163 -0
- smftools/tools/archived/subset_adata_v1.py +10 -1
- smftools/tools/archived/subset_adata_v2.py +12 -1
- smftools/tools/calculate_umap.py +54 -15
- smftools/tools/cluster_adata_on_methylation.py +115 -46
- smftools/tools/general_tools.py +70 -25
- smftools/tools/position_stats.py +229 -98
- smftools/tools/read_stats.py +50 -29
- smftools/tools/spatial_autocorrelation.py +365 -192
- smftools/tools/subset_adata.py +23 -21
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
- smftools-0.2.5.dist-info/RECORD +181 -0
- smftools-0.2.3.dist-info/RECORD +0 -173
- /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
- /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
- /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
- /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
- /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
- /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
- /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
2
|
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
3
7
|
def plot_spatial_autocorr_grid(
|
|
4
8
|
adata,
|
|
5
9
|
out_dir: str,
|
|
@@ -14,6 +18,7 @@ def plot_spatial_autocorr_grid(
|
|
|
14
18
|
references: Optional[list] = None,
|
|
15
19
|
annotate_periodicity: bool = True,
|
|
16
20
|
counts_key_suffix: str = "_counts",
|
|
21
|
+
normalization_method: str = "pearson",
|
|
17
22
|
# plotting thresholds
|
|
18
23
|
plot_min_count: int = 10,
|
|
19
24
|
):
|
|
@@ -28,14 +33,15 @@ def plot_spatial_autocorr_grid(
|
|
|
28
33
|
fall back to running the analyzer for that group (slow) and cache the result into adata.uns.
|
|
29
34
|
"""
|
|
30
35
|
import os
|
|
31
|
-
import numpy as np
|
|
32
|
-
import pandas as pd
|
|
33
|
-
import matplotlib.pyplot as plt
|
|
34
36
|
import warnings
|
|
35
37
|
|
|
38
|
+
import matplotlib.pyplot as plt
|
|
39
|
+
|
|
36
40
|
# Try importing analyzer (used only as fallback)
|
|
37
41
|
try:
|
|
38
|
-
from ..tools.spatial_autocorrelation import
|
|
42
|
+
from ..tools.spatial_autocorrelation import (
|
|
43
|
+
analyze_autocorr_matrix,
|
|
44
|
+
) # prefer packaged analyzer
|
|
39
45
|
except Exception:
|
|
40
46
|
analyze_autocorr_matrix = globals().get("analyze_autocorr_matrix", None)
|
|
41
47
|
|
|
@@ -44,6 +50,7 @@ def plot_spatial_autocorr_grid(
|
|
|
44
50
|
|
|
45
51
|
# small rolling average helper for smoother visualization
|
|
46
52
|
def _rolling_1d(arr: np.ndarray, win: int) -> np.ndarray:
|
|
53
|
+
"""Compute a rolling mean with NaN-aware normalization."""
|
|
47
54
|
if win <= 1:
|
|
48
55
|
return arr
|
|
49
56
|
valid = np.isfinite(arr).astype(float)
|
|
@@ -58,6 +65,7 @@ def plot_spatial_autocorr_grid(
|
|
|
58
65
|
|
|
59
66
|
# group summary extractor: returns (lags, mean_curve_smoothed, std_curve_smoothed, counts_block_or_None)
|
|
60
67
|
def _compute_group_summary_for_mask(site: str, mask: np.ndarray):
|
|
68
|
+
"""Extract summary curves for a site and mask."""
|
|
61
69
|
obsm_key = f"{site}_spatial_autocorr"
|
|
62
70
|
lags_key = f"{site}_spatial_autocorr_lags"
|
|
63
71
|
counts_key = f"{site}_spatial_autocorr{counts_key_suffix}"
|
|
@@ -75,7 +83,12 @@ def plot_spatial_autocorr_grid(
|
|
|
75
83
|
if counts_key in adata.obsm:
|
|
76
84
|
counts_mat = np.asarray(adata.obsm[counts_key])
|
|
77
85
|
counts = counts_mat[mask, :].astype(int)
|
|
78
|
-
return
|
|
86
|
+
return (
|
|
87
|
+
np.asarray(adata.uns[lags_key]),
|
|
88
|
+
_rolling_1d(mean_per_lag, window),
|
|
89
|
+
_rolling_1d(std_per_lag, window),
|
|
90
|
+
counts,
|
|
91
|
+
)
|
|
79
92
|
|
|
80
93
|
# samples meta
|
|
81
94
|
if sample_col not in adata.obs:
|
|
@@ -116,7 +129,8 @@ def plot_spatial_autocorr_grid(
|
|
|
116
129
|
nrows = len(chunk)
|
|
117
130
|
|
|
118
131
|
fig, axes = plt.subplots(
|
|
119
|
-
nrows=nrows,
|
|
132
|
+
nrows=nrows,
|
|
133
|
+
ncols=ncols,
|
|
120
134
|
figsize=(4.2 * ncols, 2.4 * nrows),
|
|
121
135
|
dpi=dpi,
|
|
122
136
|
squeeze=False,
|
|
@@ -141,9 +155,9 @@ def plot_spatial_autocorr_grid(
|
|
|
141
155
|
ax = axes[r, col_idx]
|
|
142
156
|
|
|
143
157
|
# compute mask
|
|
144
|
-
sample_mask =
|
|
158
|
+
sample_mask = adata.obs[sample_col].values == sample_name
|
|
145
159
|
if col_kind == "ref":
|
|
146
|
-
ref_mask =
|
|
160
|
+
ref_mask = adata.obs[reference_col].values == col_val
|
|
147
161
|
mask = sample_mask & ref_mask
|
|
148
162
|
else:
|
|
149
163
|
mask = sample_mask
|
|
@@ -152,7 +166,9 @@ def plot_spatial_autocorr_grid(
|
|
|
152
166
|
n_reads_grp = int(mask.sum())
|
|
153
167
|
|
|
154
168
|
# group summary (mean/std and counts_block)
|
|
155
|
-
lags_local, mean_curve, std_curve, counts_block =
|
|
169
|
+
lags_local, mean_curve, std_curve, counts_block = (
|
|
170
|
+
_compute_group_summary_for_mask(site, mask)
|
|
171
|
+
)
|
|
156
172
|
|
|
157
173
|
# plot title for top row
|
|
158
174
|
if r == 0:
|
|
@@ -164,9 +180,12 @@ def plot_spatial_autocorr_grid(
|
|
|
164
180
|
ax.text(0.5, 0.5, "No data", ha="center", va="center", fontsize=8)
|
|
165
181
|
ax.set_xlim(0, 1)
|
|
166
182
|
ax.set_xlabel("Lag (bp)", fontsize=7)
|
|
167
|
-
ax.
|
|
183
|
+
ax.set_ylabel(
|
|
184
|
+
f"Autocorrelation {normalization_method} normalized", fontsize=7
|
|
185
|
+
)
|
|
186
|
+
ax.tick_params(axis="both", which="major", labelsize=6)
|
|
168
187
|
ax.grid(True, alpha=0.22)
|
|
169
|
-
#col_idx += 1
|
|
188
|
+
# col_idx += 1
|
|
170
189
|
continue
|
|
171
190
|
|
|
172
191
|
# mask low-support lags if counts available
|
|
@@ -186,7 +205,13 @@ def plot_spatial_autocorr_grid(
|
|
|
186
205
|
|
|
187
206
|
# plot a faint grey line for the low-support regions (context only)
|
|
188
207
|
if low_support.any():
|
|
189
|
-
ax.plot(
|
|
208
|
+
ax.plot(
|
|
209
|
+
lags_local[low_support],
|
|
210
|
+
mean_curve_smooth[low_support],
|
|
211
|
+
color="0.85",
|
|
212
|
+
lw=0.6,
|
|
213
|
+
label="_nolegend_",
|
|
214
|
+
)
|
|
190
215
|
|
|
191
216
|
# plot mean (high-support only) and +/- std (std is computed from all molecules)
|
|
192
217
|
ax.plot(lags_local, mean_plot, lw=1.1)
|
|
@@ -201,16 +226,25 @@ def plot_spatial_autocorr_grid(
|
|
|
201
226
|
# metrics_by_group_precomp can be dict-like
|
|
202
227
|
res = metrics_by_group_precomp.get(group_key, None)
|
|
203
228
|
|
|
204
|
-
if
|
|
229
|
+
if (
|
|
230
|
+
res is None
|
|
231
|
+
and annotate_periodicity
|
|
232
|
+
and (analyze_autocorr_matrix is not None)
|
|
233
|
+
and (ac_full is not None)
|
|
234
|
+
):
|
|
205
235
|
# fallback: run analyzer on the subset (warn + cache)
|
|
206
236
|
ac_sel = ac_full[mask, :]
|
|
207
237
|
cnt_sel = counts_full[mask, :] if counts_full is not None else None
|
|
208
238
|
if ac_sel.size:
|
|
209
|
-
warnings.warn(
|
|
239
|
+
warnings.warn(
|
|
240
|
+
f"Precomputed periodicity metrics for {site} {group_key} not found — running analyzer as fallback (slow)."
|
|
241
|
+
)
|
|
210
242
|
try:
|
|
211
243
|
res = analyze_autocorr_matrix(
|
|
212
244
|
ac_sel,
|
|
213
|
-
cnt_sel
|
|
245
|
+
cnt_sel
|
|
246
|
+
if cnt_sel is not None
|
|
247
|
+
else np.zeros_like(ac_sel, dtype=int),
|
|
214
248
|
lags_local,
|
|
215
249
|
nrl_search_bp=(120, 260),
|
|
216
250
|
pad_factor=4,
|
|
@@ -239,19 +273,38 @@ def plot_spatial_autocorr_grid(
|
|
|
239
273
|
|
|
240
274
|
# vertical NRL line & harmonics (safe check)
|
|
241
275
|
if (nrl is not None) and np.isfinite(nrl):
|
|
242
|
-
ax.axvline(
|
|
276
|
+
ax.axvline(
|
|
277
|
+
float(nrl), color="C3", linestyle="--", linewidth=1.0, alpha=0.9
|
|
278
|
+
)
|
|
243
279
|
for m in range(2, 5):
|
|
244
|
-
ax.axvline(
|
|
280
|
+
ax.axvline(
|
|
281
|
+
float(nrl) * m,
|
|
282
|
+
color="C3",
|
|
283
|
+
linestyle=":",
|
|
284
|
+
linewidth=0.7,
|
|
285
|
+
alpha=0.6,
|
|
286
|
+
)
|
|
245
287
|
|
|
246
288
|
# envelope points + fitted exponential
|
|
247
289
|
if sample_lags.size:
|
|
248
290
|
ax.scatter(sample_lags, envelope_heights, s=18, color="C2")
|
|
249
|
-
if (
|
|
291
|
+
if (
|
|
292
|
+
(xi_val is not None)
|
|
293
|
+
and np.isfinite(xi_val)
|
|
294
|
+
and np.isfinite(res.get("xi_A", np.nan))
|
|
295
|
+
):
|
|
250
296
|
A = float(res.get("xi_A", np.nan))
|
|
251
297
|
xi_val = float(xi_val)
|
|
252
298
|
env_x = np.linspace(np.min(sample_lags), np.max(sample_lags), 200)
|
|
253
299
|
env_y = A * np.exp(-env_x / xi_val)
|
|
254
|
-
ax.plot(
|
|
300
|
+
ax.plot(
|
|
301
|
+
env_x,
|
|
302
|
+
env_y,
|
|
303
|
+
linestyle="--",
|
|
304
|
+
color="C2",
|
|
305
|
+
linewidth=1.0,
|
|
306
|
+
alpha=0.9,
|
|
307
|
+
)
|
|
255
308
|
|
|
256
309
|
# inset PSD plotted vs NRL (linear x-axis)
|
|
257
310
|
freqs = res.get("freqs", None)
|
|
@@ -266,7 +319,12 @@ def plot_spatial_autocorr_grid(
|
|
|
266
319
|
nrl_vals = 1.0 / freqs[valid] # convert freq -> NRL (bp)
|
|
267
320
|
inset.plot(nrl_vals, power[valid], lw=0.7)
|
|
268
321
|
if peak_f is not None and peak_f > 0:
|
|
269
|
-
inset.axvline(
|
|
322
|
+
inset.axvline(
|
|
323
|
+
1.0 / float(peak_f),
|
|
324
|
+
color="C3",
|
|
325
|
+
linestyle="--",
|
|
326
|
+
linewidth=0.8,
|
|
327
|
+
)
|
|
270
328
|
# choose a reasonable linear x-limits (prefer typical NRL range but fallback to data)
|
|
271
329
|
default_xlim = (60, 400)
|
|
272
330
|
data_xlim = (float(np.nanmin(nrl_vals)), 600)
|
|
@@ -278,17 +336,29 @@ def plot_spatial_autocorr_grid(
|
|
|
278
336
|
inset.set_ylabel("power", fontsize=6)
|
|
279
337
|
inset.tick_params(labelsize=6)
|
|
280
338
|
if (snr is not None) and np.isfinite(snr):
|
|
281
|
-
inset.text(
|
|
282
|
-
|
|
339
|
+
inset.text(
|
|
340
|
+
0.95,
|
|
341
|
+
0.95,
|
|
342
|
+
f"SNR={float(snr):.1f}",
|
|
343
|
+
transform=inset.transAxes,
|
|
344
|
+
ha="right",
|
|
345
|
+
va="top",
|
|
346
|
+
fontsize=6,
|
|
347
|
+
bbox=dict(facecolor="white", alpha=0.6, edgecolor="none"),
|
|
348
|
+
)
|
|
283
349
|
|
|
284
350
|
# set x-limits based on finite lags
|
|
285
351
|
finite_mask = np.isfinite(lags_local)
|
|
286
352
|
if finite_mask.any():
|
|
287
|
-
ax.set_xlim(
|
|
353
|
+
ax.set_xlim(
|
|
354
|
+
float(np.nanmin(lags_local[finite_mask])),
|
|
355
|
+
float(np.nanmax(lags_local[finite_mask])),
|
|
356
|
+
)
|
|
288
357
|
|
|
289
358
|
# small cosmetics
|
|
290
359
|
ax.set_xlabel("Lag (bp)", fontsize=7)
|
|
291
|
-
ax.
|
|
360
|
+
ax.set_ylabel(f"Autocorrelation {normalization_method} normalized", fontsize=7)
|
|
361
|
+
ax.tick_params(axis="both", which="major", labelsize=6)
|
|
292
362
|
ax.grid(True, alpha=0.22)
|
|
293
363
|
|
|
294
364
|
col_idx += 1
|
|
@@ -301,9 +371,13 @@ def plot_spatial_autocorr_grid(
|
|
|
301
371
|
ycenter = pos.y0 + pos.height / 2.0
|
|
302
372
|
n_reads_grp = int((adata.obs[sample_col].values == sample_name).sum())
|
|
303
373
|
label = f"{sample_name}\n(n={n_reads_grp})"
|
|
304
|
-
fig.text(0.02, ycenter, label, va=
|
|
374
|
+
fig.text(0.02, ycenter, label, va="center", ha="left", rotation="vertical", fontsize=9)
|
|
305
375
|
|
|
306
|
-
fig.suptitle(
|
|
376
|
+
fig.suptitle(
|
|
377
|
+
f"Spatial autocorrelation ({normalization_method}) by sample × (site_type × reference)",
|
|
378
|
+
y=0.995,
|
|
379
|
+
fontsize=11,
|
|
380
|
+
)
|
|
307
381
|
|
|
308
382
|
page_idx = start_idx // rows_per_fig + 1
|
|
309
383
|
out_png = os.path.join(out_dir, f"{filename_prefix}_page{page_idx}.png")
|
|
@@ -365,6 +439,7 @@ def plot_spatial_autocorr_grid(
|
|
|
365
439
|
return arr.tolist()
|
|
366
440
|
|
|
367
441
|
def _safe_float(x):
|
|
442
|
+
"""Coerce a value to float, returning NaN on failure."""
|
|
368
443
|
try:
|
|
369
444
|
return float(x)
|
|
370
445
|
except Exception:
|
|
@@ -381,15 +456,33 @@ def plot_spatial_autocorr_grid(
|
|
|
381
456
|
"site": site,
|
|
382
457
|
"sample": sample_name,
|
|
383
458
|
"reference": ref,
|
|
384
|
-
"nrl_bp": _safe_float(entry.get("nrl_bp", float("nan")))
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
"
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
"
|
|
391
|
-
|
|
392
|
-
|
|
459
|
+
"nrl_bp": _safe_float(entry.get("nrl_bp", float("nan")))
|
|
460
|
+
if entry is not None
|
|
461
|
+
else float("nan"),
|
|
462
|
+
"snr": _safe_float(entry.get("snr", float("nan")))
|
|
463
|
+
if entry is not None
|
|
464
|
+
else float("nan"),
|
|
465
|
+
"fwhm_bp": _safe_float(entry.get("fwhm_bp", float("nan")))
|
|
466
|
+
if entry is not None
|
|
467
|
+
else float("nan"),
|
|
468
|
+
"xi": _safe_float(entry.get("xi", float("nan")))
|
|
469
|
+
if entry is not None
|
|
470
|
+
else float("nan"),
|
|
471
|
+
"xi_A": _safe_float(entry.get("xi_A", float("nan")))
|
|
472
|
+
if entry is not None
|
|
473
|
+
else float("nan"),
|
|
474
|
+
"xi_r2": _safe_float(entry.get("xi_r2", float("nan")))
|
|
475
|
+
if entry is not None
|
|
476
|
+
else float("nan"),
|
|
477
|
+
"envelope_sample_lags": ";".join(map(str, env_lags_list))
|
|
478
|
+
if len(env_lags_list)
|
|
479
|
+
else "",
|
|
480
|
+
"envelope_heights": ";".join(map(str, env_heights_list))
|
|
481
|
+
if len(env_heights_list)
|
|
482
|
+
else "",
|
|
483
|
+
"analyzer_error": entry.get("error", entry.get("analyzer_error", None))
|
|
484
|
+
if entry is not None
|
|
485
|
+
else "no_metrics",
|
|
393
486
|
}
|
|
394
487
|
rows.append(row)
|
|
395
488
|
combined_rows.append(row)
|
|
@@ -404,6 +497,7 @@ def plot_spatial_autocorr_grid(
|
|
|
404
497
|
except Exception as e:
|
|
405
498
|
# don't fail the whole pipeline for a single write error; log and continue
|
|
406
499
|
import warnings
|
|
500
|
+
|
|
407
501
|
warnings.warn(f"Failed to write {out_csv}: {e}")
|
|
408
502
|
|
|
409
503
|
# write the single combined CSV (one row per sample x ref x site)
|
|
@@ -413,16 +507,19 @@ def plot_spatial_autocorr_grid(
|
|
|
413
507
|
combined_df.to_csv(combined_out, index=False)
|
|
414
508
|
except Exception as e:
|
|
415
509
|
import warnings
|
|
510
|
+
|
|
416
511
|
warnings.warn(f"Failed to write combined CSV {combined_out}: {e}")
|
|
417
512
|
|
|
418
513
|
return saved_pages
|
|
419
514
|
|
|
515
|
+
|
|
420
516
|
def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=160, show=False):
|
|
421
517
|
"""
|
|
422
518
|
Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
|
|
423
519
|
If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
|
|
424
520
|
"""
|
|
425
521
|
import matplotlib.pyplot as plt
|
|
522
|
+
|
|
426
523
|
# sort by center
|
|
427
524
|
df2 = df.sort_values("center")
|
|
428
525
|
x = df2["center"].values
|
|
@@ -447,15 +544,15 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
|
|
|
447
544
|
fig.savefig(out_png, bbox_inches="tight")
|
|
448
545
|
if not show:
|
|
449
546
|
import matplotlib
|
|
547
|
+
|
|
450
548
|
matplotlib.pyplot.close(fig)
|
|
451
549
|
return out_png
|
|
452
550
|
if not show:
|
|
453
551
|
import matplotlib
|
|
552
|
+
|
|
454
553
|
matplotlib.pyplot.close(fig)
|
|
455
554
|
return fig
|
|
456
555
|
|
|
457
|
-
import numpy as np
|
|
458
|
-
import pandas as pd
|
|
459
556
|
|
|
460
557
|
def plot_rolling_grid(
|
|
461
558
|
rolling_dict,
|
|
@@ -502,10 +599,8 @@ def plot_rolling_grid(
|
|
|
502
599
|
pages_by_metric : dict mapping metric -> [out_png_paths]
|
|
503
600
|
"""
|
|
504
601
|
import os
|
|
505
|
-
|
|
602
|
+
|
|
506
603
|
import matplotlib.pyplot as plt
|
|
507
|
-
import numpy as np
|
|
508
|
-
import pandas as pd
|
|
509
604
|
|
|
510
605
|
if per_metric_ylim is None:
|
|
511
606
|
per_metric_ylim = {}
|
|
@@ -520,7 +615,7 @@ def plot_rolling_grid(
|
|
|
520
615
|
|
|
521
616
|
# normalize reference labels and keep mapping to original
|
|
522
617
|
label_to_orig = {}
|
|
523
|
-
for
|
|
618
|
+
for _sample, ref in keys:
|
|
524
619
|
label = "all" if (ref is None) else str(ref)
|
|
525
620
|
if label not in label_to_orig:
|
|
526
621
|
label_to_orig[label] = ref
|
|
@@ -532,7 +627,11 @@ def plot_rolling_grid(
|
|
|
532
627
|
# reference labels ordering
|
|
533
628
|
default_ref_labels = sorted(label_to_orig.keys(), key=lambda s: s)
|
|
534
629
|
if reference_order is not None:
|
|
535
|
-
ref_labels = [
|
|
630
|
+
ref_labels = [
|
|
631
|
+
("all" if r is None else str(r))
|
|
632
|
+
for r in reference_order
|
|
633
|
+
if (("all" if r is None else str(r)) in label_to_orig)
|
|
634
|
+
]
|
|
536
635
|
else:
|
|
537
636
|
ref_labels = default_ref_labels
|
|
538
637
|
|
|
@@ -553,9 +652,11 @@ def plot_rolling_grid(
|
|
|
553
652
|
nrows = len(page_samples)
|
|
554
653
|
|
|
555
654
|
fig, axes = plt.subplots(
|
|
556
|
-
nrows=nrows,
|
|
655
|
+
nrows=nrows,
|
|
656
|
+
ncols=cols_per_page,
|
|
557
657
|
figsize=(figsize_per_panel[0] * cols_per_page, figsize_per_panel[1] * nrows),
|
|
558
|
-
dpi=dpi,
|
|
658
|
+
dpi=dpi,
|
|
659
|
+
squeeze=False,
|
|
559
660
|
)
|
|
560
661
|
|
|
561
662
|
for i, sample in enumerate(page_samples):
|