smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +6 -8
- smftools/_settings.py +4 -6
- smftools/_version.py +1 -1
- smftools/cli/helpers.py +54 -0
- smftools/cli/hmm_adata.py +937 -256
- smftools/cli/load_adata.py +448 -268
- smftools/cli/preprocess_adata.py +469 -263
- smftools/cli/spatial_adata.py +536 -319
- smftools/cli_entry.py +97 -182
- smftools/config/__init__.py +1 -1
- smftools/config/conversion.yaml +17 -6
- smftools/config/deaminase.yaml +12 -10
- smftools/config/default.yaml +142 -33
- smftools/config/direct.yaml +11 -3
- smftools/config/discover_input_files.py +19 -5
- smftools/config/experiment_config.py +594 -264
- smftools/constants.py +37 -0
- smftools/datasets/__init__.py +2 -8
- smftools/datasets/datasets.py +32 -18
- smftools/hmm/HMM.py +2128 -1418
- smftools/hmm/__init__.py +2 -9
- smftools/hmm/archived/call_hmm_peaks.py +121 -0
- smftools/hmm/call_hmm_peaks.py +299 -91
- smftools/hmm/display_hmm.py +19 -6
- smftools/hmm/hmm_readwrite.py +13 -4
- smftools/hmm/nucleosome_hmm_refinement.py +102 -14
- smftools/informatics/__init__.py +30 -7
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
- smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
- smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
- smftools/informatics/archived/print_bam_query_seq.py +7 -1
- smftools/informatics/bam_functions.py +397 -175
- smftools/informatics/basecalling.py +51 -9
- smftools/informatics/bed_functions.py +90 -57
- smftools/informatics/binarize_converted_base_identities.py +18 -7
- smftools/informatics/complement_base_list.py +7 -6
- smftools/informatics/converted_BAM_to_adata.py +265 -122
- smftools/informatics/fasta_functions.py +161 -83
- smftools/informatics/h5ad_functions.py +196 -30
- smftools/informatics/modkit_extract_to_adata.py +609 -270
- smftools/informatics/modkit_functions.py +85 -44
- smftools/informatics/ohe.py +44 -21
- smftools/informatics/pod5_functions.py +112 -73
- smftools/informatics/run_multiqc.py +20 -14
- smftools/logging_utils.py +51 -0
- smftools/machine_learning/__init__.py +2 -7
- smftools/machine_learning/data/anndata_data_module.py +143 -50
- smftools/machine_learning/data/preprocessing.py +2 -1
- smftools/machine_learning/evaluation/__init__.py +1 -1
- smftools/machine_learning/evaluation/eval_utils.py +11 -14
- smftools/machine_learning/evaluation/evaluators.py +46 -33
- smftools/machine_learning/inference/__init__.py +1 -1
- smftools/machine_learning/inference/inference_utils.py +7 -4
- smftools/machine_learning/inference/lightning_inference.py +9 -13
- smftools/machine_learning/inference/sklearn_inference.py +6 -8
- smftools/machine_learning/inference/sliding_window_inference.py +35 -25
- smftools/machine_learning/models/__init__.py +10 -5
- smftools/machine_learning/models/base.py +28 -42
- smftools/machine_learning/models/cnn.py +15 -11
- smftools/machine_learning/models/lightning_base.py +71 -40
- smftools/machine_learning/models/mlp.py +13 -4
- smftools/machine_learning/models/positional.py +3 -2
- smftools/machine_learning/models/rnn.py +3 -2
- smftools/machine_learning/models/sklearn_models.py +39 -22
- smftools/machine_learning/models/transformer.py +68 -53
- smftools/machine_learning/models/wrappers.py +2 -1
- smftools/machine_learning/training/__init__.py +2 -2
- smftools/machine_learning/training/train_lightning_model.py +29 -20
- smftools/machine_learning/training/train_sklearn_model.py +9 -15
- smftools/machine_learning/utils/__init__.py +1 -1
- smftools/machine_learning/utils/device.py +7 -4
- smftools/machine_learning/utils/grl.py +3 -1
- smftools/metadata.py +443 -0
- smftools/plotting/__init__.py +19 -5
- smftools/plotting/autocorrelation_plotting.py +145 -44
- smftools/plotting/classifiers.py +162 -72
- smftools/plotting/general_plotting.py +422 -197
- smftools/plotting/hmm_plotting.py +42 -13
- smftools/plotting/position_stats.py +147 -87
- smftools/plotting/qc_plotting.py +20 -12
- smftools/preprocessing/__init__.py +10 -12
- smftools/preprocessing/append_base_context.py +115 -80
- smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
- smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
- smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
- smftools/preprocessing/binarize.py +21 -4
- smftools/preprocessing/binarize_on_Youden.py +129 -31
- smftools/preprocessing/binary_layers_to_ohe.py +17 -11
- smftools/preprocessing/calculate_complexity_II.py +86 -59
- smftools/preprocessing/calculate_consensus.py +28 -19
- smftools/preprocessing/calculate_coverage.py +50 -25
- smftools/preprocessing/calculate_pairwise_differences.py +2 -1
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
- smftools/preprocessing/calculate_position_Youden.py +118 -54
- smftools/preprocessing/calculate_read_length_stats.py +52 -23
- smftools/preprocessing/calculate_read_modification_stats.py +91 -57
- smftools/preprocessing/clean_NaN.py +38 -28
- smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
- smftools/preprocessing/flag_duplicate_reads.py +689 -272
- smftools/preprocessing/invert_adata.py +26 -11
- smftools/preprocessing/load_sample_sheet.py +40 -22
- smftools/preprocessing/make_dirs.py +8 -3
- smftools/preprocessing/min_non_diagonal.py +2 -1
- smftools/preprocessing/recipes.py +56 -23
- smftools/preprocessing/reindex_references_adata.py +103 -0
- smftools/preprocessing/subsample_adata.py +33 -16
- smftools/readwrite.py +331 -82
- smftools/schema/__init__.py +11 -0
- smftools/schema/anndata_schema_v1.yaml +227 -0
- smftools/tools/__init__.py +3 -4
- smftools/tools/archived/classifiers.py +163 -0
- smftools/tools/archived/subset_adata_v1.py +10 -1
- smftools/tools/archived/subset_adata_v2.py +12 -1
- smftools/tools/calculate_umap.py +54 -15
- smftools/tools/cluster_adata_on_methylation.py +115 -46
- smftools/tools/general_tools.py +70 -25
- smftools/tools/position_stats.py +229 -98
- smftools/tools/read_stats.py +50 -29
- smftools/tools/spatial_autocorrelation.py +365 -192
- smftools/tools/subset_adata.py +23 -21
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
- smftools-0.2.5.dist-info/RECORD +181 -0
- smftools-0.2.3.dist-info/RECORD +0 -173
- /smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
- /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
- /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
- /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
- /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
- /smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
- /smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0
smftools/constants.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from types import MappingProxyType
|
|
4
|
+
from typing import Any, Dict, Final, Mapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## Helpers ##
|
|
8
|
+
def _deep_freeze(obj: Any) -> Any:
|
|
9
|
+
"""Recursively freeze common containers. Use for constant exports."""
|
|
10
|
+
if isinstance(obj, dict):
|
|
11
|
+
return MappingProxyType({k: _deep_freeze(v) for k, v in obj.items()})
|
|
12
|
+
if isinstance(obj, (list, tuple)):
|
|
13
|
+
return tuple(_deep_freeze(v) for v in obj)
|
|
14
|
+
if isinstance(obj, set):
|
|
15
|
+
return frozenset(_deep_freeze(v) for v in obj)
|
|
16
|
+
return obj # ints/strs/tuples (already immutable)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
## Constants ##
|
|
20
|
+
BAM_SUFFIX: Final[str] = ".bam"
|
|
21
|
+
BARCODE_BOTH_ENDS: Final[bool] = False
|
|
22
|
+
REF_COL: Final[str] = "Reference_strand"
|
|
23
|
+
SAMPLE_COL: Final[str] = "Experiment_name_and_barcode"
|
|
24
|
+
SPLIT_DIR: Final[str] = "demultiplexed_BAMs"
|
|
25
|
+
TRIM: Final[bool] = False
|
|
26
|
+
|
|
27
|
+
_private_conversions = ["unconverted"]
|
|
28
|
+
CONVERSIONS: Final[list[str]] = _deep_freeze(_private_conversions)
|
|
29
|
+
|
|
30
|
+
_private_mod_list = ("5mC_5hmC", "6mA")
|
|
31
|
+
MOD_LIST: Final[tuple[str, ...]] = _deep_freeze(_private_mod_list)
|
|
32
|
+
|
|
33
|
+
_private_mod_map: Dict[str, str] = {"6mA": "6mA", "5mC_5hmC": "5mC"}
|
|
34
|
+
MOD_MAP: Final[Mapping[str, str]] = _deep_freeze(_private_mod_map)
|
|
35
|
+
|
|
36
|
+
_private_strands = ("bottom", "top")
|
|
37
|
+
STRANDS: Final[tuple[str, ...]] = _deep_freeze(_private_strands)
|
smftools/datasets/__init__.py
CHANGED
|
@@ -1,9 +1,3 @@
|
|
|
1
|
-
from .datasets import
|
|
2
|
-
dCas9_kinetics,
|
|
3
|
-
Kissiov_and_McKenna_2025
|
|
4
|
-
)
|
|
1
|
+
from .datasets import Kissiov_and_McKenna_2025, dCas9_kinetics
|
|
5
2
|
|
|
6
|
-
__all__ = [
|
|
7
|
-
"dCas9_kinetics",
|
|
8
|
-
"Kissiov_and_McKenna_2025"
|
|
9
|
-
]
|
|
3
|
+
__all__ = ["dCas9_kinetics", "Kissiov_and_McKenna_2025"]
|
smftools/datasets/datasets.py
CHANGED
|
@@ -1,28 +1,42 @@
|
|
|
1
|
-
|
|
1
|
+
"""Dataset helpers for bundled SMF datasets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
import anndata as ad
|
|
2
10
|
|
|
3
|
-
def import_HERE():
|
|
4
|
-
"""
|
|
5
|
-
Imports HERE for loading datasets
|
|
6
|
-
"""
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
from .._settings import settings
|
|
9
|
-
HERE = Path(__file__).parent
|
|
10
|
-
return HERE
|
|
11
11
|
|
|
12
|
-
def
|
|
12
|
+
def import_HERE() -> Path:
|
|
13
|
+
"""Resolve the local dataset directory.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Path: Path to the datasets directory.
|
|
13
17
|
"""
|
|
14
|
-
|
|
18
|
+
return Path(__file__).parent
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def dCas9_kinetics() -> "ad.AnnData":
|
|
22
|
+
"""Load the in vitro Hia5 dCas9 kinetics SMF dataset.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
anndata.AnnData: Annotated dataset with Nanopore HAC m6A modcalls.
|
|
15
26
|
"""
|
|
16
27
|
import anndata as ad
|
|
17
|
-
|
|
18
|
-
filepath =
|
|
28
|
+
|
|
29
|
+
filepath = import_HERE() / "dCas9_m6A_invitro_kinetics.h5ad.gz"
|
|
19
30
|
return ad.read_h5ad(filepath)
|
|
20
31
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
F1 Hybrid M.CviPI natural killer cell SMF
|
|
32
|
+
|
|
33
|
+
def Kissiov_and_McKenna_2025() -> "ad.AnnData":
|
|
34
|
+
"""Load the F1 Hybrid M.CviPI natural killer cell SMF dataset.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
anndata.AnnData: Annotated dataset with canonical calls of NEB EMseq converted SMF gDNA.
|
|
24
38
|
"""
|
|
25
39
|
import anndata as ad
|
|
26
|
-
|
|
27
|
-
filepath =
|
|
40
|
+
|
|
41
|
+
filepath = import_HERE() / "F1_hybrid_NKG2A_enhander_promoter_GpC_conversion_SMF.h5ad.gz"
|
|
28
42
|
return ad.read_h5ad(filepath)
|