smftools 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +39 -7
- smftools/_settings.py +2 -0
- smftools/_version.py +3 -1
- smftools/cli/__init__.py +1 -0
- smftools/cli/archived/cli_flows.py +2 -0
- smftools/cli/helpers.py +2 -0
- smftools/cli/hmm_adata.py +7 -2
- smftools/cli/load_adata.py +130 -98
- smftools/cli/preprocess_adata.py +2 -0
- smftools/cli/spatial_adata.py +5 -1
- smftools/cli_entry.py +26 -1
- smftools/config/__init__.py +2 -0
- smftools/config/default.yaml +4 -1
- smftools/config/experiment_config.py +6 -0
- smftools/datasets/__init__.py +2 -0
- smftools/hmm/HMM.py +9 -3
- smftools/hmm/__init__.py +24 -13
- smftools/hmm/archived/apply_hmm_batched.py +2 -0
- smftools/hmm/archived/calculate_distances.py +2 -0
- smftools/hmm/archived/call_hmm_peaks.py +2 -0
- smftools/hmm/archived/train_hmm.py +2 -0
- smftools/hmm/call_hmm_peaks.py +5 -2
- smftools/hmm/display_hmm.py +4 -1
- smftools/hmm/hmm_readwrite.py +7 -2
- smftools/hmm/nucleosome_hmm_refinement.py +2 -0
- smftools/informatics/__init__.py +53 -34
- smftools/informatics/archived/bam_conversion.py +2 -0
- smftools/informatics/archived/bam_direct.py +2 -0
- smftools/informatics/archived/basecall_pod5s.py +2 -0
- smftools/informatics/archived/basecalls_to_adata.py +2 -0
- smftools/informatics/archived/conversion_smf.py +2 -0
- smftools/informatics/archived/deaminase_smf.py +1 -0
- smftools/informatics/archived/direct_smf.py +2 -0
- smftools/informatics/archived/fast5_to_pod5.py +2 -0
- smftools/informatics/archived/helpers/archived/__init__.py +2 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
- smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
- smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
- smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
- smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
- smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
- smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
- smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
- smftools/informatics/archived/helpers/archived/informatics.py +2 -0
- smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
- smftools/informatics/archived/helpers/archived/modQC.py +2 -0
- smftools/informatics/archived/helpers/archived/modcall.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
- smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
- smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +2 -0
- smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
- smftools/informatics/archived/subsample_pod5.py +2 -0
- smftools/informatics/bam_functions.py +737 -170
- smftools/informatics/basecalling.py +2 -0
- smftools/informatics/bed_functions.py +271 -61
- smftools/informatics/binarize_converted_base_identities.py +3 -0
- smftools/informatics/complement_base_list.py +2 -0
- smftools/informatics/converted_BAM_to_adata.py +66 -22
- smftools/informatics/fasta_functions.py +94 -10
- smftools/informatics/h5ad_functions.py +8 -2
- smftools/informatics/modkit_extract_to_adata.py +16 -6
- smftools/informatics/modkit_functions.py +2 -0
- smftools/informatics/ohe.py +2 -0
- smftools/informatics/pod5_functions.py +3 -2
- smftools/machine_learning/__init__.py +22 -6
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +18 -4
- smftools/machine_learning/data/preprocessing.py +2 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +2 -0
- smftools/machine_learning/evaluation/evaluators.py +14 -9
- smftools/machine_learning/inference/__init__.py +2 -0
- smftools/machine_learning/inference/inference_utils.py +2 -0
- smftools/machine_learning/inference/lightning_inference.py +6 -1
- smftools/machine_learning/inference/sklearn_inference.py +2 -0
- smftools/machine_learning/inference/sliding_window_inference.py +2 -0
- smftools/machine_learning/models/__init__.py +2 -0
- smftools/machine_learning/models/base.py +7 -2
- smftools/machine_learning/models/cnn.py +7 -2
- smftools/machine_learning/models/lightning_base.py +16 -11
- smftools/machine_learning/models/mlp.py +5 -1
- smftools/machine_learning/models/positional.py +7 -2
- smftools/machine_learning/models/rnn.py +5 -1
- smftools/machine_learning/models/sklearn_models.py +14 -9
- smftools/machine_learning/models/transformer.py +7 -2
- smftools/machine_learning/models/wrappers.py +6 -2
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +13 -3
- smftools/machine_learning/training/train_sklearn_model.py +2 -0
- smftools/machine_learning/utils/__init__.py +2 -0
- smftools/machine_learning/utils/device.py +5 -1
- smftools/machine_learning/utils/grl.py +5 -1
- smftools/optional_imports.py +31 -0
- smftools/plotting/__init__.py +32 -31
- smftools/plotting/autocorrelation_plotting.py +9 -5
- smftools/plotting/classifiers.py +16 -4
- smftools/plotting/general_plotting.py +6 -3
- smftools/plotting/hmm_plotting.py +12 -2
- smftools/plotting/position_stats.py +15 -7
- smftools/plotting/qc_plotting.py +6 -1
- smftools/preprocessing/__init__.py +35 -37
- smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
- smftools/preprocessing/archived/calculate_complexity.py +2 -0
- smftools/preprocessing/archived/mark_duplicates.py +2 -0
- smftools/preprocessing/archived/preprocessing.py +2 -0
- smftools/preprocessing/archived/remove_duplicates.py +2 -0
- smftools/preprocessing/binary_layers_to_ohe.py +2 -1
- smftools/preprocessing/calculate_complexity_II.py +4 -1
- smftools/preprocessing/calculate_pairwise_differences.py +2 -0
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
- smftools/preprocessing/calculate_position_Youden.py +9 -2
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
- smftools/preprocessing/flag_duplicate_reads.py +42 -54
- smftools/preprocessing/make_dirs.py +2 -1
- smftools/preprocessing/min_non_diagonal.py +2 -0
- smftools/preprocessing/recipes.py +2 -0
- smftools/tools/__init__.py +26 -18
- smftools/tools/archived/apply_hmm.py +2 -0
- smftools/tools/archived/classifiers.py +2 -0
- smftools/tools/archived/classify_methylated_features.py +2 -0
- smftools/tools/archived/classify_non_methylated_features.py +2 -0
- smftools/tools/archived/subset_adata_v1.py +2 -0
- smftools/tools/archived/subset_adata_v2.py +2 -0
- smftools/tools/calculate_umap.py +3 -1
- smftools/tools/cluster_adata_on_methylation.py +7 -1
- smftools/tools/position_stats.py +17 -27
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/METADATA +67 -33
- smftools-0.3.0.dist-info/RECORD +182 -0
- smftools-0.2.5.dist-info/RECORD +0 -181
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Utilities for optional dependency handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib import import_module
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def require(package: str, *, extra: str, purpose: str | None = None) -> Any:
|
|
10
|
+
"""Import an optional dependency with a helpful error message.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
package: Importable module name (e.g., "torch", "scanpy").
|
|
14
|
+
extra: Extra name users should install (e.g., "ml", "omics").
|
|
15
|
+
purpose: Optional context describing the feature needing the dependency.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The imported module.
|
|
19
|
+
|
|
20
|
+
Raises:
|
|
21
|
+
ModuleNotFoundError: If the package is not installed.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
return import_module(package)
|
|
25
|
+
except ModuleNotFoundError as exc: # pragma: no cover - depends on env
|
|
26
|
+
reason = f" for {purpose}" if purpose else ""
|
|
27
|
+
message = (
|
|
28
|
+
f"Optional dependency '{package}' is required{reason}. "
|
|
29
|
+
f"Install it with: pip install 'smftools[{extra}]'"
|
|
30
|
+
)
|
|
31
|
+
raise ModuleNotFoundError(message) from exc
|
smftools/plotting/__init__.py
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
|
-
from
|
|
2
|
-
from .classifiers import (
|
|
3
|
-
plot_feature_importances_or_saliency,
|
|
4
|
-
plot_model_curves_from_adata,
|
|
5
|
-
plot_model_curves_from_adata_with_frequency_grid,
|
|
6
|
-
plot_model_performance,
|
|
7
|
-
)
|
|
8
|
-
from .general_plotting import (
|
|
9
|
-
combined_hmm_raw_clustermap,
|
|
10
|
-
combined_raw_clustermap,
|
|
11
|
-
plot_hmm_layers_rolling_by_sample_ref,
|
|
12
|
-
)
|
|
13
|
-
from .hmm_plotting import *
|
|
14
|
-
from .position_stats import (
|
|
15
|
-
plot_bar_relative_risk,
|
|
16
|
-
plot_positionwise_matrix,
|
|
17
|
-
plot_positionwise_matrix_grid,
|
|
18
|
-
plot_volcano_relative_risk,
|
|
19
|
-
)
|
|
20
|
-
from .qc_plotting import *
|
|
1
|
+
from __future__ import annotations
|
|
21
2
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
|
|
5
|
+
_LAZY_ATTRS = {
|
|
6
|
+
"combined_hmm_raw_clustermap": "smftools.plotting.general_plotting",
|
|
7
|
+
"combined_raw_clustermap": "smftools.plotting.general_plotting",
|
|
8
|
+
"plot_hmm_layers_rolling_by_sample_ref": "smftools.plotting.general_plotting",
|
|
9
|
+
"plot_bar_relative_risk": "smftools.plotting.position_stats",
|
|
10
|
+
"plot_positionwise_matrix": "smftools.plotting.position_stats",
|
|
11
|
+
"plot_positionwise_matrix_grid": "smftools.plotting.position_stats",
|
|
12
|
+
"plot_volcano_relative_risk": "smftools.plotting.position_stats",
|
|
13
|
+
"plot_feature_importances_or_saliency": "smftools.plotting.classifiers",
|
|
14
|
+
"plot_model_curves_from_adata": "smftools.plotting.classifiers",
|
|
15
|
+
"plot_model_curves_from_adata_with_frequency_grid": "smftools.plotting.classifiers",
|
|
16
|
+
"plot_model_performance": "smftools.plotting.classifiers",
|
|
17
|
+
"plot_read_qc_histograms": "smftools.plotting.qc_plotting",
|
|
18
|
+
"plot_rolling_grid": "smftools.plotting.autocorrelation_plotting",
|
|
19
|
+
"plot_spatial_autocorr_grid": "smftools.plotting.autocorrelation_plotting",
|
|
20
|
+
"plot_hmm_size_contours": "smftools.plotting.hmm_plotting",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def __getattr__(name: str):
|
|
25
|
+
if name in _LAZY_ATTRS:
|
|
26
|
+
module = import_module(_LAZY_ATTRS[name])
|
|
27
|
+
attr = getattr(module, name)
|
|
28
|
+
globals()[name] = attr
|
|
29
|
+
return attr
|
|
30
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
__all__ = list(_LAZY_ATTRS.keys())
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import numpy as np
|
|
4
6
|
import pandas as pd
|
|
5
7
|
|
|
8
|
+
from smftools.optional_imports import require
|
|
9
|
+
|
|
6
10
|
|
|
7
11
|
def plot_spatial_autocorr_grid(
|
|
8
12
|
adata,
|
|
@@ -35,7 +39,7 @@ def plot_spatial_autocorr_grid(
|
|
|
35
39
|
import os
|
|
36
40
|
import warnings
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
39
43
|
|
|
40
44
|
# Try importing analyzer (used only as fallback)
|
|
41
45
|
try:
|
|
@@ -518,7 +522,7 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
|
|
|
518
522
|
Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
|
|
519
523
|
If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
|
|
520
524
|
"""
|
|
521
|
-
|
|
525
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
522
526
|
|
|
523
527
|
# sort by center
|
|
524
528
|
df2 = df.sort_values("center")
|
|
@@ -543,12 +547,12 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
|
|
|
543
547
|
if out_png:
|
|
544
548
|
fig.savefig(out_png, bbox_inches="tight")
|
|
545
549
|
if not show:
|
|
546
|
-
|
|
550
|
+
matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
|
|
547
551
|
|
|
548
552
|
matplotlib.pyplot.close(fig)
|
|
549
553
|
return out_png
|
|
550
554
|
if not show:
|
|
551
|
-
|
|
555
|
+
matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
|
|
552
556
|
|
|
553
557
|
matplotlib.pyplot.close(fig)
|
|
554
558
|
return fig
|
|
@@ -600,7 +604,7 @@ def plot_rolling_grid(
|
|
|
600
604
|
"""
|
|
601
605
|
import os
|
|
602
606
|
|
|
603
|
-
|
|
607
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
604
608
|
|
|
605
609
|
if per_metric_ylim is None:
|
|
606
610
|
per_metric_ylim = {}
|
smftools/plotting/classifiers.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
|
|
3
|
-
import matplotlib.pyplot as plt
|
|
4
5
|
import numpy as np
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
from smftools.optional_imports import require
|
|
8
|
+
|
|
9
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="model plots")
|
|
10
|
+
torch = require("torch", extra="ml-base", purpose="model saliency plots")
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
def plot_model_performance(metrics, save_path=None):
|
|
@@ -260,7 +265,10 @@ def plot_model_curves_from_adata(
|
|
|
260
265
|
ylim_roc: Y-axis limits for ROC curve.
|
|
261
266
|
ylim_pr: Y-axis limits for PR curve.
|
|
262
267
|
"""
|
|
263
|
-
|
|
268
|
+
sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
|
|
269
|
+
auc = sklearn_metrics.auc
|
|
270
|
+
precision_recall_curve = sklearn_metrics.precision_recall_curve
|
|
271
|
+
roc_curve = sklearn_metrics.roc_curve
|
|
264
272
|
|
|
265
273
|
if omit_training:
|
|
266
274
|
subset = adata[~adata.obs["used_for_training"].astype(bool)]
|
|
@@ -349,7 +357,11 @@ def plot_model_curves_from_adata_with_frequency_grid(
|
|
|
349
357
|
import os
|
|
350
358
|
|
|
351
359
|
import numpy as np
|
|
352
|
-
|
|
360
|
+
|
|
361
|
+
sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
|
|
362
|
+
auc = sklearn_metrics.auc
|
|
363
|
+
precision_recall_curve = sklearn_metrics.precision_recall_curve
|
|
364
|
+
roc_curve = sklearn_metrics.roc_curve
|
|
353
365
|
|
|
354
366
|
if f1_levels is None:
|
|
355
367
|
f1_levels = np.linspace(0.2, 0.9, 8)
|
|
@@ -5,12 +5,15 @@ import os
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple
|
|
7
7
|
|
|
8
|
-
import matplotlib.gridspec as gridspec
|
|
9
|
-
import matplotlib.pyplot as plt
|
|
10
8
|
import numpy as np
|
|
11
9
|
import pandas as pd
|
|
12
10
|
import scipy.cluster.hierarchy as sch
|
|
13
|
-
|
|
11
|
+
|
|
12
|
+
from smftools.optional_imports import require
|
|
13
|
+
|
|
14
|
+
gridspec = require("matplotlib.gridspec", extra="plotting", purpose="heatmap plotting")
|
|
15
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="plot rendering")
|
|
16
|
+
sns = require("seaborn", extra="plotting", purpose="plot styling")
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
def _fixed_tick_positions(n_positions: int, n_ticks: int) -> np.ndarray:
|
|
@@ -1,9 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import math
|
|
2
4
|
from typing import Optional, Tuple, Union
|
|
3
5
|
|
|
4
|
-
import matplotlib.pyplot as plt
|
|
5
6
|
import numpy as np
|
|
6
|
-
|
|
7
|
+
|
|
8
|
+
from smftools.optional_imports import require
|
|
9
|
+
|
|
10
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="HMM plots")
|
|
11
|
+
pdf_backend = require(
|
|
12
|
+
"matplotlib.backends.backend_pdf",
|
|
13
|
+
extra="plotting",
|
|
14
|
+
purpose="PDF output",
|
|
15
|
+
)
|
|
16
|
+
PdfPages = pdf_backend.PdfPages
|
|
7
17
|
|
|
8
18
|
|
|
9
19
|
def plot_hmm_size_contours(
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from smftools.optional_imports import require
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
def plot_volcano_relative_risk(
|
|
2
7
|
results_dict,
|
|
3
8
|
save_path=None,
|
|
@@ -22,7 +27,7 @@ def plot_volcano_relative_risk(
|
|
|
22
27
|
"""
|
|
23
28
|
import os
|
|
24
29
|
|
|
25
|
-
|
|
30
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="relative risk plots")
|
|
26
31
|
|
|
27
32
|
for ref, group_results in results_dict.items():
|
|
28
33
|
for group_label, (results_df, _) in group_results.items():
|
|
@@ -124,7 +129,7 @@ def plot_bar_relative_risk(
|
|
|
124
129
|
"""
|
|
125
130
|
import os
|
|
126
131
|
|
|
127
|
-
|
|
132
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="relative risk plots")
|
|
128
133
|
|
|
129
134
|
for ref, group_data in results_dict.items():
|
|
130
135
|
for group_label, (df, _) in group_data.items():
|
|
@@ -229,10 +234,11 @@ def plot_positionwise_matrix(
|
|
|
229
234
|
"""
|
|
230
235
|
import os
|
|
231
236
|
|
|
232
|
-
import matplotlib.pyplot as plt
|
|
233
237
|
import numpy as np
|
|
234
238
|
import pandas as pd
|
|
235
|
-
|
|
239
|
+
|
|
240
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="position stats plots")
|
|
241
|
+
sns = require("seaborn", extra="plotting", purpose="position stats plots")
|
|
236
242
|
|
|
237
243
|
def find_closest_index(index, target):
|
|
238
244
|
"""Find the index value closest to a target value."""
|
|
@@ -408,12 +414,14 @@ def plot_positionwise_matrix_grid(
|
|
|
408
414
|
"""
|
|
409
415
|
import os
|
|
410
416
|
|
|
411
|
-
import matplotlib.pyplot as plt
|
|
412
417
|
import numpy as np
|
|
413
418
|
import pandas as pd
|
|
414
|
-
import seaborn as sns
|
|
415
419
|
from joblib import Parallel, delayed
|
|
416
|
-
|
|
420
|
+
|
|
421
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="position stats plots")
|
|
422
|
+
sns = require("seaborn", extra="plotting", purpose="position stats plots")
|
|
423
|
+
grid_spec = require("matplotlib.gridspec", extra="plotting", purpose="position stats plots")
|
|
424
|
+
GridSpec = grid_spec.GridSpec
|
|
417
425
|
|
|
418
426
|
matrices = adata.uns[key]
|
|
419
427
|
group_labels = list(matrices.keys())
|
smftools/plotting/qc_plotting.py
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
|
|
3
|
-
import matplotlib.pyplot as plt
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
6
7
|
|
|
8
|
+
from smftools.optional_imports import require
|
|
9
|
+
|
|
10
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="QC plots")
|
|
11
|
+
|
|
7
12
|
|
|
8
13
|
def plot_read_qc_histograms(
|
|
9
14
|
adata,
|
|
@@ -1,38 +1,36 @@
|
|
|
1
|
-
from
|
|
2
|
-
from .append_binary_layer_by_base_context import append_binary_layer_by_base_context
|
|
3
|
-
from .binarize import binarize_adata
|
|
4
|
-
from .binarize_on_Youden import binarize_on_Youden
|
|
5
|
-
from .calculate_complexity_II import calculate_complexity_II
|
|
6
|
-
from .calculate_coverage import calculate_coverage
|
|
7
|
-
from .calculate_position_Youden import calculate_position_Youden
|
|
8
|
-
from .calculate_read_length_stats import calculate_read_length_stats
|
|
9
|
-
from .calculate_read_modification_stats import calculate_read_modification_stats
|
|
10
|
-
from .clean_NaN import clean_NaN
|
|
11
|
-
from .filter_adata_by_nan_proportion import filter_adata_by_nan_proportion
|
|
12
|
-
from .filter_reads_on_length_quality_mapping import filter_reads_on_length_quality_mapping
|
|
13
|
-
from .filter_reads_on_modification_thresholds import filter_reads_on_modification_thresholds
|
|
14
|
-
from .flag_duplicate_reads import flag_duplicate_reads
|
|
15
|
-
from .invert_adata import invert_adata
|
|
16
|
-
from .load_sample_sheet import load_sample_sheet
|
|
17
|
-
from .reindex_references_adata import reindex_references_adata
|
|
18
|
-
from .subsample_adata import subsample_adata
|
|
1
|
+
from __future__ import annotations
|
|
19
2
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"flag_duplicate_reads",
|
|
37
|
-
"
|
|
38
|
-
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
|
|
5
|
+
_LAZY_ATTRS = {
|
|
6
|
+
"append_base_context": "smftools.preprocessing.append_base_context",
|
|
7
|
+
"append_binary_layer_by_base_context": "smftools.preprocessing.append_binary_layer_by_base_context",
|
|
8
|
+
"binarize_adata": "smftools.preprocessing.binarize",
|
|
9
|
+
"binarize_on_Youden": "smftools.preprocessing.binarize_on_Youden",
|
|
10
|
+
"calculate_complexity_II": "smftools.preprocessing.calculate_complexity_II",
|
|
11
|
+
"calculate_coverage": "smftools.preprocessing.calculate_coverage",
|
|
12
|
+
"calculate_position_Youden": "smftools.preprocessing.calculate_position_Youden",
|
|
13
|
+
"calculate_read_length_stats": "smftools.preprocessing.calculate_read_length_stats",
|
|
14
|
+
"calculate_read_modification_stats": "smftools.preprocessing.calculate_read_modification_stats",
|
|
15
|
+
"clean_NaN": "smftools.preprocessing.clean_NaN",
|
|
16
|
+
"filter_adata_by_nan_proportion": "smftools.preprocessing.filter_adata_by_nan_proportion",
|
|
17
|
+
"filter_reads_on_length_quality_mapping": "smftools.preprocessing.filter_reads_on_length_quality_mapping",
|
|
18
|
+
"filter_reads_on_modification_thresholds": "smftools.preprocessing.filter_reads_on_modification_thresholds",
|
|
19
|
+
"flag_duplicate_reads": "smftools.preprocessing.flag_duplicate_reads",
|
|
20
|
+
"invert_adata": "smftools.preprocessing.invert_adata",
|
|
21
|
+
"load_sample_sheet": "smftools.preprocessing.load_sample_sheet",
|
|
22
|
+
"reindex_references_adata": "smftools.preprocessing.reindex_references_adata",
|
|
23
|
+
"subsample_adata": "smftools.preprocessing.subsample_adata",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def __getattr__(name: str):
|
|
28
|
+
if name in _LAZY_ATTRS:
|
|
29
|
+
module = import_module(_LAZY_ATTRS[name])
|
|
30
|
+
attr = getattr(module, name)
|
|
31
|
+
globals()[name] = attr
|
|
32
|
+
return attr
|
|
33
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = list(_LAZY_ATTRS.keys())
|
|
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import TYPE_CHECKING, Optional
|
|
5
5
|
|
|
6
|
+
from smftools.optional_imports import require
|
|
7
|
+
|
|
6
8
|
if TYPE_CHECKING:
|
|
7
9
|
import anndata as ad
|
|
8
10
|
|
|
@@ -46,11 +48,12 @@ def calculate_complexity_II(
|
|
|
46
48
|
"""
|
|
47
49
|
import os
|
|
48
50
|
|
|
49
|
-
import matplotlib.pyplot as plt
|
|
50
51
|
import numpy as np
|
|
51
52
|
import pandas as pd
|
|
52
53
|
from scipy.optimize import curve_fit
|
|
53
54
|
|
|
55
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="complexity plots")
|
|
56
|
+
|
|
54
57
|
# early exits
|
|
55
58
|
already = bool(adata.uns.get(uns_flag, False))
|
|
56
59
|
if already and not force_redo:
|
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import TYPE_CHECKING
|
|
7
7
|
|
|
8
8
|
from smftools.logging_utils import get_logger
|
|
9
|
+
from smftools.optional_imports import require
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
11
12
|
import anndata as ad
|
|
@@ -40,9 +41,15 @@ def calculate_position_Youden(
|
|
|
40
41
|
save: Whether to save ROC plots to disk.
|
|
41
42
|
output_directory: Output directory for ROC plots.
|
|
42
43
|
"""
|
|
43
|
-
import matplotlib.pyplot as plt
|
|
44
44
|
import numpy as np
|
|
45
|
-
|
|
45
|
+
|
|
46
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="Youden ROC plots")
|
|
47
|
+
sklearn_metrics = require(
|
|
48
|
+
"sklearn.metrics",
|
|
49
|
+
extra="ml-base",
|
|
50
|
+
purpose="Youden ROC curve calculation",
|
|
51
|
+
)
|
|
52
|
+
roc_curve = sklearn_metrics.roc_curve
|
|
46
53
|
|
|
47
54
|
control_samples = [positive_control_sample, negative_control_sample]
|
|
48
55
|
references = adata.obs[ref_column].cat.categories
|