smftools 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +39 -7
- smftools/_settings.py +2 -0
- smftools/_version.py +3 -1
- smftools/cli/__init__.py +1 -0
- smftools/cli/archived/cli_flows.py +2 -0
- smftools/cli/helpers.py +34 -6
- smftools/cli/hmm_adata.py +239 -33
- smftools/cli/latent_adata.py +318 -0
- smftools/cli/load_adata.py +167 -131
- smftools/cli/preprocess_adata.py +180 -53
- smftools/cli/spatial_adata.py +152 -100
- smftools/cli_entry.py +38 -1
- smftools/config/__init__.py +2 -0
- smftools/config/conversion.yaml +11 -1
- smftools/config/default.yaml +42 -2
- smftools/config/experiment_config.py +59 -1
- smftools/constants.py +65 -0
- smftools/datasets/__init__.py +2 -0
- smftools/hmm/HMM.py +97 -3
- smftools/hmm/__init__.py +24 -13
- smftools/hmm/archived/apply_hmm_batched.py +2 -0
- smftools/hmm/archived/calculate_distances.py +2 -0
- smftools/hmm/archived/call_hmm_peaks.py +2 -0
- smftools/hmm/archived/train_hmm.py +2 -0
- smftools/hmm/call_hmm_peaks.py +5 -2
- smftools/hmm/display_hmm.py +4 -1
- smftools/hmm/hmm_readwrite.py +7 -2
- smftools/hmm/nucleosome_hmm_refinement.py +2 -0
- smftools/informatics/__init__.py +59 -34
- smftools/informatics/archived/bam_conversion.py +2 -0
- smftools/informatics/archived/bam_direct.py +2 -0
- smftools/informatics/archived/basecall_pod5s.py +2 -0
- smftools/informatics/archived/basecalls_to_adata.py +2 -0
- smftools/informatics/archived/conversion_smf.py +2 -0
- smftools/informatics/archived/deaminase_smf.py +1 -0
- smftools/informatics/archived/direct_smf.py +2 -0
- smftools/informatics/archived/fast5_to_pod5.py +2 -0
- smftools/informatics/archived/helpers/archived/__init__.py +2 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
- smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
- smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
- smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
- smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
- smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
- smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
- smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
- smftools/informatics/archived/helpers/archived/informatics.py +2 -0
- smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
- smftools/informatics/archived/helpers/archived/modQC.py +2 -0
- smftools/informatics/archived/helpers/archived/modcall.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
- smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
- smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +2 -0
- smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
- smftools/informatics/archived/subsample_pod5.py +2 -0
- smftools/informatics/bam_functions.py +1093 -176
- smftools/informatics/basecalling.py +2 -0
- smftools/informatics/bed_functions.py +271 -61
- smftools/informatics/binarize_converted_base_identities.py +3 -0
- smftools/informatics/complement_base_list.py +2 -0
- smftools/informatics/converted_BAM_to_adata.py +641 -176
- smftools/informatics/fasta_functions.py +94 -10
- smftools/informatics/h5ad_functions.py +123 -4
- smftools/informatics/modkit_extract_to_adata.py +1019 -431
- smftools/informatics/modkit_functions.py +2 -0
- smftools/informatics/ohe.py +2 -0
- smftools/informatics/pod5_functions.py +3 -2
- smftools/informatics/sequence_encoding.py +72 -0
- smftools/logging_utils.py +21 -2
- smftools/machine_learning/__init__.py +22 -6
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +18 -4
- smftools/machine_learning/data/preprocessing.py +2 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +2 -0
- smftools/machine_learning/evaluation/evaluators.py +14 -9
- smftools/machine_learning/inference/__init__.py +2 -0
- smftools/machine_learning/inference/inference_utils.py +2 -0
- smftools/machine_learning/inference/lightning_inference.py +6 -1
- smftools/machine_learning/inference/sklearn_inference.py +2 -0
- smftools/machine_learning/inference/sliding_window_inference.py +2 -0
- smftools/machine_learning/models/__init__.py +2 -0
- smftools/machine_learning/models/base.py +7 -2
- smftools/machine_learning/models/cnn.py +7 -2
- smftools/machine_learning/models/lightning_base.py +16 -11
- smftools/machine_learning/models/mlp.py +5 -1
- smftools/machine_learning/models/positional.py +7 -2
- smftools/machine_learning/models/rnn.py +5 -1
- smftools/machine_learning/models/sklearn_models.py +14 -9
- smftools/machine_learning/models/transformer.py +7 -2
- smftools/machine_learning/models/wrappers.py +6 -2
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +13 -3
- smftools/machine_learning/training/train_sklearn_model.py +2 -0
- smftools/machine_learning/utils/__init__.py +2 -0
- smftools/machine_learning/utils/device.py +5 -1
- smftools/machine_learning/utils/grl.py +5 -1
- smftools/metadata.py +1 -1
- smftools/optional_imports.py +31 -0
- smftools/plotting/__init__.py +41 -31
- smftools/plotting/autocorrelation_plotting.py +9 -5
- smftools/plotting/classifiers.py +16 -4
- smftools/plotting/general_plotting.py +2415 -629
- smftools/plotting/hmm_plotting.py +97 -9
- smftools/plotting/position_stats.py +15 -7
- smftools/plotting/qc_plotting.py +6 -1
- smftools/preprocessing/__init__.py +36 -37
- smftools/preprocessing/append_base_context.py +17 -17
- smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
- smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
- smftools/preprocessing/archived/calculate_complexity.py +2 -0
- smftools/preprocessing/archived/mark_duplicates.py +2 -0
- smftools/preprocessing/archived/preprocessing.py +2 -0
- smftools/preprocessing/archived/remove_duplicates.py +2 -0
- smftools/preprocessing/binary_layers_to_ohe.py +2 -1
- smftools/preprocessing/calculate_complexity_II.py +4 -1
- smftools/preprocessing/calculate_consensus.py +1 -1
- smftools/preprocessing/calculate_pairwise_differences.py +2 -0
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
- smftools/preprocessing/calculate_position_Youden.py +9 -2
- smftools/preprocessing/calculate_read_modification_stats.py +6 -1
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
- smftools/preprocessing/flag_duplicate_reads.py +42 -54
- smftools/preprocessing/make_dirs.py +2 -1
- smftools/preprocessing/min_non_diagonal.py +2 -0
- smftools/preprocessing/recipes.py +2 -0
- smftools/readwrite.py +53 -17
- smftools/schema/anndata_schema_v1.yaml +15 -1
- smftools/tools/__init__.py +30 -18
- smftools/tools/archived/apply_hmm.py +2 -0
- smftools/tools/archived/classifiers.py +2 -0
- smftools/tools/archived/classify_methylated_features.py +2 -0
- smftools/tools/archived/classify_non_methylated_features.py +2 -0
- smftools/tools/archived/subset_adata_v1.py +2 -0
- smftools/tools/archived/subset_adata_v2.py +2 -0
- smftools/tools/calculate_leiden.py +57 -0
- smftools/tools/calculate_nmf.py +119 -0
- smftools/tools/calculate_umap.py +93 -8
- smftools/tools/cluster_adata_on_methylation.py +7 -1
- smftools/tools/position_stats.py +17 -27
- smftools/tools/rolling_nn_distance.py +235 -0
- smftools/tools/tensor_factorization.py +169 -0
- {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/METADATA +69 -33
- smftools-0.3.1.dist-info/RECORD +189 -0
- smftools-0.2.5.dist-info/RECORD +0 -181
- {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/WHEEL +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/licenses/LICENSE +0 -0
smftools/plotting/__init__.py
CHANGED
|
@@ -1,32 +1,42 @@
|
|
|
1
|
-
from
|
|
2
|
-
from .classifiers import (
|
|
3
|
-
plot_feature_importances_or_saliency,
|
|
4
|
-
plot_model_curves_from_adata,
|
|
5
|
-
plot_model_curves_from_adata_with_frequency_grid,
|
|
6
|
-
plot_model_performance,
|
|
7
|
-
)
|
|
8
|
-
from .general_plotting import (
|
|
9
|
-
combined_hmm_raw_clustermap,
|
|
10
|
-
combined_raw_clustermap,
|
|
11
|
-
plot_hmm_layers_rolling_by_sample_ref,
|
|
12
|
-
)
|
|
13
|
-
from .hmm_plotting import *
|
|
14
|
-
from .position_stats import (
|
|
15
|
-
plot_bar_relative_risk,
|
|
16
|
-
plot_positionwise_matrix,
|
|
17
|
-
plot_positionwise_matrix_grid,
|
|
18
|
-
plot_volcano_relative_risk,
|
|
19
|
-
)
|
|
20
|
-
from .qc_plotting import *
|
|
1
|
+
from __future__ import annotations
|
|
21
2
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
|
|
5
|
+
_LAZY_ATTRS = {
|
|
6
|
+
"combined_hmm_length_clustermap": "smftools.plotting.general_plotting",
|
|
7
|
+
"combined_hmm_raw_clustermap": "smftools.plotting.general_plotting",
|
|
8
|
+
"combined_raw_clustermap": "smftools.plotting.general_plotting",
|
|
9
|
+
"plot_rolling_nn_and_layer": "smftools.plotting.general_plotting",
|
|
10
|
+
"plot_hmm_layers_rolling_by_sample_ref": "smftools.plotting.general_plotting",
|
|
11
|
+
"plot_nmf_components": "smftools.plotting.general_plotting",
|
|
12
|
+
"plot_cp_sequence_components": "smftools.plotting.general_plotting",
|
|
13
|
+
"plot_embedding": "smftools.plotting.general_plotting",
|
|
14
|
+
"plot_read_span_quality_clustermaps": "smftools.plotting.general_plotting",
|
|
15
|
+
"plot_pca": "smftools.plotting.general_plotting",
|
|
16
|
+
"plot_sequence_integer_encoding_clustermaps": "smftools.plotting.general_plotting",
|
|
17
|
+
"plot_umap": "smftools.plotting.general_plotting",
|
|
18
|
+
"plot_bar_relative_risk": "smftools.plotting.position_stats",
|
|
19
|
+
"plot_positionwise_matrix": "smftools.plotting.position_stats",
|
|
20
|
+
"plot_positionwise_matrix_grid": "smftools.plotting.position_stats",
|
|
21
|
+
"plot_volcano_relative_risk": "smftools.plotting.position_stats",
|
|
22
|
+
"plot_feature_importances_or_saliency": "smftools.plotting.classifiers",
|
|
23
|
+
"plot_model_curves_from_adata": "smftools.plotting.classifiers",
|
|
24
|
+
"plot_model_curves_from_adata_with_frequency_grid": "smftools.plotting.classifiers",
|
|
25
|
+
"plot_model_performance": "smftools.plotting.classifiers",
|
|
26
|
+
"plot_read_qc_histograms": "smftools.plotting.qc_plotting",
|
|
27
|
+
"plot_rolling_grid": "smftools.plotting.autocorrelation_plotting",
|
|
28
|
+
"plot_spatial_autocorr_grid": "smftools.plotting.autocorrelation_plotting",
|
|
29
|
+
"plot_hmm_size_contours": "smftools.plotting.hmm_plotting",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def __getattr__(name: str):
|
|
34
|
+
if name in _LAZY_ATTRS:
|
|
35
|
+
module = import_module(_LAZY_ATTRS[name])
|
|
36
|
+
attr = getattr(module, name)
|
|
37
|
+
globals()[name] = attr
|
|
38
|
+
return attr
|
|
39
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
__all__ = list(_LAZY_ATTRS.keys())
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import numpy as np
|
|
4
6
|
import pandas as pd
|
|
5
7
|
|
|
8
|
+
from smftools.optional_imports import require
|
|
9
|
+
|
|
6
10
|
|
|
7
11
|
def plot_spatial_autocorr_grid(
|
|
8
12
|
adata,
|
|
@@ -35,7 +39,7 @@ def plot_spatial_autocorr_grid(
|
|
|
35
39
|
import os
|
|
36
40
|
import warnings
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
39
43
|
|
|
40
44
|
# Try importing analyzer (used only as fallback)
|
|
41
45
|
try:
|
|
@@ -518,7 +522,7 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
|
|
|
518
522
|
Plot NRL and SNR vs window center from the dataframe returned by rolling_autocorr_metrics.
|
|
519
523
|
If out_png is None, returns the matplotlib Figure object; otherwise saves PNG and returns path.
|
|
520
524
|
"""
|
|
521
|
-
|
|
525
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
522
526
|
|
|
523
527
|
# sort by center
|
|
524
528
|
df2 = df.sort_values("center")
|
|
@@ -543,12 +547,12 @@ def plot_rolling_metrics(df, out_png=None, title=None, figsize=(10, 3.5), dpi=16
|
|
|
543
547
|
if out_png:
|
|
544
548
|
fig.savefig(out_png, bbox_inches="tight")
|
|
545
549
|
if not show:
|
|
546
|
-
|
|
550
|
+
matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
|
|
547
551
|
|
|
548
552
|
matplotlib.pyplot.close(fig)
|
|
549
553
|
return out_png
|
|
550
554
|
if not show:
|
|
551
|
-
|
|
555
|
+
matplotlib = require("matplotlib", extra="plotting", purpose="autocorrelation plots")
|
|
552
556
|
|
|
553
557
|
matplotlib.pyplot.close(fig)
|
|
554
558
|
return fig
|
|
@@ -600,7 +604,7 @@ def plot_rolling_grid(
|
|
|
600
604
|
"""
|
|
601
605
|
import os
|
|
602
606
|
|
|
603
|
-
|
|
607
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="autocorrelation plots")
|
|
604
608
|
|
|
605
609
|
if per_metric_ylim is None:
|
|
606
610
|
per_metric_ylim = {}
|
smftools/plotting/classifiers.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import os
|
|
2
4
|
|
|
3
|
-
import matplotlib.pyplot as plt
|
|
4
5
|
import numpy as np
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
from smftools.optional_imports import require
|
|
8
|
+
|
|
9
|
+
plt = require("matplotlib.pyplot", extra="plotting", purpose="model plots")
|
|
10
|
+
torch = require("torch", extra="ml-base", purpose="model saliency plots")
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
def plot_model_performance(metrics, save_path=None):
|
|
@@ -260,7 +265,10 @@ def plot_model_curves_from_adata(
|
|
|
260
265
|
ylim_roc: Y-axis limits for ROC curve.
|
|
261
266
|
ylim_pr: Y-axis limits for PR curve.
|
|
262
267
|
"""
|
|
263
|
-
|
|
268
|
+
sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
|
|
269
|
+
auc = sklearn_metrics.auc
|
|
270
|
+
precision_recall_curve = sklearn_metrics.precision_recall_curve
|
|
271
|
+
roc_curve = sklearn_metrics.roc_curve
|
|
264
272
|
|
|
265
273
|
if omit_training:
|
|
266
274
|
subset = adata[~adata.obs["used_for_training"].astype(bool)]
|
|
@@ -349,7 +357,11 @@ def plot_model_curves_from_adata_with_frequency_grid(
|
|
|
349
357
|
import os
|
|
350
358
|
|
|
351
359
|
import numpy as np
|
|
352
|
-
|
|
360
|
+
|
|
361
|
+
sklearn_metrics = require("sklearn.metrics", extra="ml-base", purpose="model curves")
|
|
362
|
+
auc = sklearn_metrics.auc
|
|
363
|
+
precision_recall_curve = sklearn_metrics.precision_recall_curve
|
|
364
|
+
roc_curve = sklearn_metrics.roc_curve
|
|
353
365
|
|
|
354
366
|
if f1_levels is None:
|
|
355
367
|
f1_levels = np.linspace(0.2, 0.9, 8)
|