smftools 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +43 -13
- smftools/_settings.py +6 -6
- smftools/_version.py +3 -1
- smftools/cli/__init__.py +1 -0
- smftools/cli/archived/cli_flows.py +2 -0
- smftools/cli/helpers.py +9 -1
- smftools/cli/hmm_adata.py +905 -242
- smftools/cli/load_adata.py +432 -280
- smftools/cli/preprocess_adata.py +287 -171
- smftools/cli/spatial_adata.py +141 -53
- smftools/cli_entry.py +119 -178
- smftools/config/__init__.py +3 -1
- smftools/config/conversion.yaml +5 -1
- smftools/config/deaminase.yaml +1 -1
- smftools/config/default.yaml +26 -18
- smftools/config/direct.yaml +8 -3
- smftools/config/discover_input_files.py +19 -5
- smftools/config/experiment_config.py +511 -276
- smftools/constants.py +37 -0
- smftools/datasets/__init__.py +4 -8
- smftools/datasets/datasets.py +32 -18
- smftools/hmm/HMM.py +2133 -1428
- smftools/hmm/__init__.py +24 -14
- smftools/hmm/archived/apply_hmm_batched.py +2 -0
- smftools/hmm/archived/calculate_distances.py +2 -0
- smftools/hmm/archived/call_hmm_peaks.py +18 -1
- smftools/hmm/archived/train_hmm.py +2 -0
- smftools/hmm/call_hmm_peaks.py +176 -193
- smftools/hmm/display_hmm.py +23 -7
- smftools/hmm/hmm_readwrite.py +20 -6
- smftools/hmm/nucleosome_hmm_refinement.py +104 -14
- smftools/informatics/__init__.py +55 -13
- smftools/informatics/archived/bam_conversion.py +2 -0
- smftools/informatics/archived/bam_direct.py +2 -0
- smftools/informatics/archived/basecall_pod5s.py +2 -0
- smftools/informatics/archived/basecalls_to_adata.py +2 -0
- smftools/informatics/archived/conversion_smf.py +2 -0
- smftools/informatics/archived/deaminase_smf.py +1 -0
- smftools/informatics/archived/direct_smf.py +2 -0
- smftools/informatics/archived/fast5_to_pod5.py +2 -0
- smftools/informatics/archived/helpers/archived/__init__.py +2 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +16 -1
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
- smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
- smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
- smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
- smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
- smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
- smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
- smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
- smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
- smftools/informatics/archived/helpers/archived/informatics.py +2 -0
- smftools/informatics/archived/helpers/archived/load_adata.py +5 -3
- smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
- smftools/informatics/archived/helpers/archived/modQC.py +2 -0
- smftools/informatics/archived/helpers/archived/modcall.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +5 -1
- smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
- smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +9 -1
- smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
- smftools/informatics/archived/subsample_pod5.py +2 -0
- smftools/informatics/bam_functions.py +1059 -269
- smftools/informatics/basecalling.py +53 -9
- smftools/informatics/bed_functions.py +357 -114
- smftools/informatics/binarize_converted_base_identities.py +21 -7
- smftools/informatics/complement_base_list.py +9 -6
- smftools/informatics/converted_BAM_to_adata.py +324 -137
- smftools/informatics/fasta_functions.py +251 -89
- smftools/informatics/h5ad_functions.py +202 -30
- smftools/informatics/modkit_extract_to_adata.py +623 -274
- smftools/informatics/modkit_functions.py +87 -44
- smftools/informatics/ohe.py +46 -21
- smftools/informatics/pod5_functions.py +114 -74
- smftools/informatics/run_multiqc.py +20 -14
- smftools/logging_utils.py +51 -0
- smftools/machine_learning/__init__.py +23 -12
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +157 -50
- smftools/machine_learning/data/preprocessing.py +4 -1
- smftools/machine_learning/evaluation/__init__.py +3 -1
- smftools/machine_learning/evaluation/eval_utils.py +13 -14
- smftools/machine_learning/evaluation/evaluators.py +52 -34
- smftools/machine_learning/inference/__init__.py +3 -1
- smftools/machine_learning/inference/inference_utils.py +9 -4
- smftools/machine_learning/inference/lightning_inference.py +14 -13
- smftools/machine_learning/inference/sklearn_inference.py +8 -8
- smftools/machine_learning/inference/sliding_window_inference.py +37 -25
- smftools/machine_learning/models/__init__.py +12 -5
- smftools/machine_learning/models/base.py +34 -43
- smftools/machine_learning/models/cnn.py +22 -13
- smftools/machine_learning/models/lightning_base.py +78 -42
- smftools/machine_learning/models/mlp.py +18 -5
- smftools/machine_learning/models/positional.py +10 -4
- smftools/machine_learning/models/rnn.py +8 -3
- smftools/machine_learning/models/sklearn_models.py +46 -24
- smftools/machine_learning/models/transformer.py +75 -55
- smftools/machine_learning/models/wrappers.py +8 -3
- smftools/machine_learning/training/__init__.py +4 -2
- smftools/machine_learning/training/train_lightning_model.py +42 -23
- smftools/machine_learning/training/train_sklearn_model.py +11 -15
- smftools/machine_learning/utils/__init__.py +3 -1
- smftools/machine_learning/utils/device.py +12 -5
- smftools/machine_learning/utils/grl.py +8 -2
- smftools/metadata.py +443 -0
- smftools/optional_imports.py +31 -0
- smftools/plotting/__init__.py +32 -17
- smftools/plotting/autocorrelation_plotting.py +153 -48
- smftools/plotting/classifiers.py +175 -73
- smftools/plotting/general_plotting.py +350 -168
- smftools/plotting/hmm_plotting.py +53 -14
- smftools/plotting/position_stats.py +155 -87
- smftools/plotting/qc_plotting.py +25 -12
- smftools/preprocessing/__init__.py +35 -37
- smftools/preprocessing/append_base_context.py +105 -79
- smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
- smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +2 -0
- smftools/preprocessing/{archives → archived}/calculate_complexity.py +5 -1
- smftools/preprocessing/{archives → archived}/mark_duplicates.py +2 -0
- smftools/preprocessing/{archives → archived}/preprocessing.py +10 -6
- smftools/preprocessing/{archives → archived}/remove_duplicates.py +2 -0
- smftools/preprocessing/binarize.py +21 -4
- smftools/preprocessing/binarize_on_Youden.py +127 -31
- smftools/preprocessing/binary_layers_to_ohe.py +18 -11
- smftools/preprocessing/calculate_complexity_II.py +89 -59
- smftools/preprocessing/calculate_consensus.py +28 -19
- smftools/preprocessing/calculate_coverage.py +44 -22
- smftools/preprocessing/calculate_pairwise_differences.py +4 -1
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +7 -3
- smftools/preprocessing/calculate_position_Youden.py +110 -55
- smftools/preprocessing/calculate_read_length_stats.py +52 -23
- smftools/preprocessing/calculate_read_modification_stats.py +91 -57
- smftools/preprocessing/clean_NaN.py +38 -28
- smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +72 -37
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +183 -73
- smftools/preprocessing/flag_duplicate_reads.py +708 -303
- smftools/preprocessing/invert_adata.py +26 -11
- smftools/preprocessing/load_sample_sheet.py +40 -22
- smftools/preprocessing/make_dirs.py +9 -3
- smftools/preprocessing/min_non_diagonal.py +4 -1
- smftools/preprocessing/recipes.py +58 -23
- smftools/preprocessing/reindex_references_adata.py +93 -27
- smftools/preprocessing/subsample_adata.py +33 -16
- smftools/readwrite.py +264 -109
- smftools/schema/__init__.py +11 -0
- smftools/schema/anndata_schema_v1.yaml +227 -0
- smftools/tools/__init__.py +25 -18
- smftools/tools/archived/apply_hmm.py +2 -0
- smftools/tools/archived/classifiers.py +165 -0
- smftools/tools/archived/classify_methylated_features.py +2 -0
- smftools/tools/archived/classify_non_methylated_features.py +2 -0
- smftools/tools/archived/subset_adata_v1.py +12 -1
- smftools/tools/archived/subset_adata_v2.py +14 -1
- smftools/tools/calculate_umap.py +56 -15
- smftools/tools/cluster_adata_on_methylation.py +122 -47
- smftools/tools/general_tools.py +70 -25
- smftools/tools/position_stats.py +220 -99
- smftools/tools/read_stats.py +50 -29
- smftools/tools/spatial_autocorrelation.py +365 -192
- smftools/tools/subset_adata.py +23 -21
- smftools-0.3.0.dist-info/METADATA +147 -0
- smftools-0.3.0.dist-info/RECORD +182 -0
- smftools-0.2.4.dist-info/METADATA +0 -141
- smftools-0.2.4.dist-info/RECORD +0 -176
- {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
- {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
smftools/__init__.py
CHANGED
|
@@ -1,22 +1,52 @@
|
|
|
1
1
|
"""smftools"""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
4
6
|
import warnings
|
|
5
|
-
|
|
6
|
-
from . import informatics as inform
|
|
7
|
-
from . import machine_learning as ml
|
|
8
|
-
from . import plotting as pl
|
|
9
|
-
from . import preprocessing as pp
|
|
10
|
-
from . import tools as tl
|
|
11
|
-
|
|
12
|
-
from . import cli, config, datasets, hmm
|
|
13
|
-
from .readwrite import adata_to_df, safe_write_h5ad, safe_read_h5ad, merge_barcoded_anndatas_core
|
|
14
|
-
|
|
7
|
+
from importlib import import_module
|
|
15
8
|
from importlib.metadata import version
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from .readwrite import adata_to_df, safe_read_h5ad, safe_write_h5ad
|
|
16
12
|
|
|
17
13
|
package_name = "smftools"
|
|
18
14
|
__version__ = version(package_name)
|
|
19
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from smftools import (
|
|
18
|
+
cli,
|
|
19
|
+
config,
|
|
20
|
+
datasets,
|
|
21
|
+
hmm,
|
|
22
|
+
informatics,
|
|
23
|
+
machine_learning,
|
|
24
|
+
plotting,
|
|
25
|
+
preprocessing,
|
|
26
|
+
tools,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
_LAZY_MODULES = {
|
|
30
|
+
"cli": "smftools.cli",
|
|
31
|
+
"config": "smftools.config",
|
|
32
|
+
"datasets": "smftools.datasets",
|
|
33
|
+
"hmm": "smftools.hmm",
|
|
34
|
+
"inform": "smftools.informatics",
|
|
35
|
+
"ml": "smftools.machine_learning",
|
|
36
|
+
"pl": "smftools.plotting",
|
|
37
|
+
"pp": "smftools.preprocessing",
|
|
38
|
+
"tl": "smftools.tools",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def __getattr__(name: str):
|
|
43
|
+
if name in _LAZY_MODULES:
|
|
44
|
+
module = import_module(_LAZY_MODULES[name])
|
|
45
|
+
globals()[name] = module
|
|
46
|
+
return module
|
|
47
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
48
|
+
|
|
49
|
+
|
|
20
50
|
__all__ = [
|
|
21
51
|
"adata_to_df",
|
|
22
52
|
"inform",
|
|
@@ -24,7 +54,7 @@ __all__ = [
|
|
|
24
54
|
"pp",
|
|
25
55
|
"tl",
|
|
26
56
|
"pl",
|
|
27
|
-
"datasets"
|
|
57
|
+
"datasets",
|
|
28
58
|
"safe_write_h5ad",
|
|
29
|
-
"safe_read_h5ad"
|
|
30
|
-
]
|
|
59
|
+
"safe_read_h5ad",
|
|
60
|
+
]
|
smftools/_settings.py
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from pathlib import Path
|
|
2
4
|
from typing import Union
|
|
3
5
|
|
|
6
|
+
|
|
4
7
|
class SMFConfig:
|
|
5
8
|
"""\
|
|
6
9
|
Config for smftools.
|
|
7
10
|
"""
|
|
8
11
|
|
|
9
|
-
def __init__(
|
|
10
|
-
self,
|
|
11
|
-
*,
|
|
12
|
-
datasetdir: Union[Path, str] = "./datasets/"
|
|
13
|
-
):
|
|
12
|
+
def __init__(self, *, datasetdir: Union[Path, str] = "./datasets/"):
|
|
14
13
|
self._datasetdir = Path(datasetdir) if isinstance(datasetdir, str) else datasetdir
|
|
15
14
|
|
|
16
15
|
@property
|
|
17
16
|
def datasetdir(self) -> Path:
|
|
18
17
|
return self._datasetdir
|
|
19
18
|
|
|
20
|
-
|
|
19
|
+
|
|
20
|
+
settings = SMFConfig()
|
smftools/_version.py
CHANGED
smftools/cli/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from __future__ import annotations
|
smftools/cli/helpers.py
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from dataclasses import dataclass
|
|
2
4
|
from pathlib import Path
|
|
5
|
+
|
|
3
6
|
import anndata as ad
|
|
7
|
+
|
|
8
|
+
from ..metadata import write_runtime_schema_yaml
|
|
4
9
|
from ..readwrite import safe_write_h5ad
|
|
5
10
|
|
|
11
|
+
|
|
6
12
|
@dataclass
|
|
7
13
|
class AdataPaths:
|
|
8
14
|
raw: Path
|
|
@@ -41,8 +47,10 @@ def get_adata_paths(cfg) -> AdataPaths:
|
|
|
41
47
|
hmm=hmm,
|
|
42
48
|
)
|
|
43
49
|
|
|
50
|
+
|
|
44
51
|
def write_gz_h5ad(adata: ad.AnnData, path: Path) -> Path:
|
|
45
52
|
if path.suffix != ".gz":
|
|
46
53
|
path = path.with_name(path.name + ".gz")
|
|
47
54
|
safe_write_h5ad(adata, path, compression="gzip", backup=True)
|
|
48
|
-
|
|
55
|
+
write_runtime_schema_yaml(adata, path, step_name="runtime")
|
|
56
|
+
return path
|