smftools 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smftools/__init__.py +39 -7
- smftools/_settings.py +2 -0
- smftools/_version.py +3 -1
- smftools/cli/__init__.py +1 -0
- smftools/cli/archived/cli_flows.py +2 -0
- smftools/cli/helpers.py +2 -0
- smftools/cli/hmm_adata.py +7 -2
- smftools/cli/load_adata.py +130 -98
- smftools/cli/preprocess_adata.py +2 -0
- smftools/cli/spatial_adata.py +5 -1
- smftools/cli_entry.py +26 -1
- smftools/config/__init__.py +2 -0
- smftools/config/default.yaml +4 -1
- smftools/config/experiment_config.py +6 -0
- smftools/datasets/__init__.py +2 -0
- smftools/hmm/HMM.py +9 -3
- smftools/hmm/__init__.py +24 -13
- smftools/hmm/archived/apply_hmm_batched.py +2 -0
- smftools/hmm/archived/calculate_distances.py +2 -0
- smftools/hmm/archived/call_hmm_peaks.py +2 -0
- smftools/hmm/archived/train_hmm.py +2 -0
- smftools/hmm/call_hmm_peaks.py +5 -2
- smftools/hmm/display_hmm.py +4 -1
- smftools/hmm/hmm_readwrite.py +7 -2
- smftools/hmm/nucleosome_hmm_refinement.py +2 -0
- smftools/informatics/__init__.py +53 -34
- smftools/informatics/archived/bam_conversion.py +2 -0
- smftools/informatics/archived/bam_direct.py +2 -0
- smftools/informatics/archived/basecall_pod5s.py +2 -0
- smftools/informatics/archived/basecalls_to_adata.py +2 -0
- smftools/informatics/archived/conversion_smf.py +2 -0
- smftools/informatics/archived/deaminase_smf.py +1 -0
- smftools/informatics/archived/direct_smf.py +2 -0
- smftools/informatics/archived/fast5_to_pod5.py +2 -0
- smftools/informatics/archived/helpers/archived/__init__.py +2 -0
- smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
- smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
- smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
- smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
- smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
- smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
- smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
- smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
- smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
- smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
- smftools/informatics/archived/helpers/archived/informatics.py +2 -0
- smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
- smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
- smftools/informatics/archived/helpers/archived/modQC.py +2 -0
- smftools/informatics/archived/helpers/archived/modcall.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
- smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
- smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
- smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
- smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
- smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
- smftools/informatics/archived/print_bam_query_seq.py +2 -0
- smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
- smftools/informatics/archived/subsample_pod5.py +2 -0
- smftools/informatics/bam_functions.py +737 -170
- smftools/informatics/basecalling.py +2 -0
- smftools/informatics/bed_functions.py +271 -61
- smftools/informatics/binarize_converted_base_identities.py +3 -0
- smftools/informatics/complement_base_list.py +2 -0
- smftools/informatics/converted_BAM_to_adata.py +66 -22
- smftools/informatics/fasta_functions.py +94 -10
- smftools/informatics/h5ad_functions.py +8 -2
- smftools/informatics/modkit_extract_to_adata.py +16 -6
- smftools/informatics/modkit_functions.py +2 -0
- smftools/informatics/ohe.py +2 -0
- smftools/informatics/pod5_functions.py +3 -2
- smftools/machine_learning/__init__.py +22 -6
- smftools/machine_learning/data/__init__.py +2 -0
- smftools/machine_learning/data/anndata_data_module.py +18 -4
- smftools/machine_learning/data/preprocessing.py +2 -0
- smftools/machine_learning/evaluation/__init__.py +2 -0
- smftools/machine_learning/evaluation/eval_utils.py +2 -0
- smftools/machine_learning/evaluation/evaluators.py +14 -9
- smftools/machine_learning/inference/__init__.py +2 -0
- smftools/machine_learning/inference/inference_utils.py +2 -0
- smftools/machine_learning/inference/lightning_inference.py +6 -1
- smftools/machine_learning/inference/sklearn_inference.py +2 -0
- smftools/machine_learning/inference/sliding_window_inference.py +2 -0
- smftools/machine_learning/models/__init__.py +2 -0
- smftools/machine_learning/models/base.py +7 -2
- smftools/machine_learning/models/cnn.py +7 -2
- smftools/machine_learning/models/lightning_base.py +16 -11
- smftools/machine_learning/models/mlp.py +5 -1
- smftools/machine_learning/models/positional.py +7 -2
- smftools/machine_learning/models/rnn.py +5 -1
- smftools/machine_learning/models/sklearn_models.py +14 -9
- smftools/machine_learning/models/transformer.py +7 -2
- smftools/machine_learning/models/wrappers.py +6 -2
- smftools/machine_learning/training/__init__.py +2 -0
- smftools/machine_learning/training/train_lightning_model.py +13 -3
- smftools/machine_learning/training/train_sklearn_model.py +2 -0
- smftools/machine_learning/utils/__init__.py +2 -0
- smftools/machine_learning/utils/device.py +5 -1
- smftools/machine_learning/utils/grl.py +5 -1
- smftools/optional_imports.py +31 -0
- smftools/plotting/__init__.py +32 -31
- smftools/plotting/autocorrelation_plotting.py +9 -5
- smftools/plotting/classifiers.py +16 -4
- smftools/plotting/general_plotting.py +6 -3
- smftools/plotting/hmm_plotting.py +12 -2
- smftools/plotting/position_stats.py +15 -7
- smftools/plotting/qc_plotting.py +6 -1
- smftools/preprocessing/__init__.py +35 -37
- smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
- smftools/preprocessing/archived/calculate_complexity.py +2 -0
- smftools/preprocessing/archived/mark_duplicates.py +2 -0
- smftools/preprocessing/archived/preprocessing.py +2 -0
- smftools/preprocessing/archived/remove_duplicates.py +2 -0
- smftools/preprocessing/binary_layers_to_ohe.py +2 -1
- smftools/preprocessing/calculate_complexity_II.py +4 -1
- smftools/preprocessing/calculate_pairwise_differences.py +2 -0
- smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
- smftools/preprocessing/calculate_position_Youden.py +9 -2
- smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
- smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
- smftools/preprocessing/flag_duplicate_reads.py +42 -54
- smftools/preprocessing/make_dirs.py +2 -1
- smftools/preprocessing/min_non_diagonal.py +2 -0
- smftools/preprocessing/recipes.py +2 -0
- smftools/tools/__init__.py +26 -18
- smftools/tools/archived/apply_hmm.py +2 -0
- smftools/tools/archived/classifiers.py +2 -0
- smftools/tools/archived/classify_methylated_features.py +2 -0
- smftools/tools/archived/classify_non_methylated_features.py +2 -0
- smftools/tools/archived/subset_adata_v1.py +2 -0
- smftools/tools/archived/subset_adata_v2.py +2 -0
- smftools/tools/calculate_umap.py +3 -1
- smftools/tools/cluster_adata_on_methylation.py +7 -1
- smftools/tools/position_stats.py +17 -27
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/METADATA +67 -33
- smftools-0.3.0.dist-info/RECORD +182 -0
- smftools-0.2.5.dist-info/RECORD +0 -181
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
- {smftools-0.2.5.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0
smftools/hmm/display_hmm.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from smftools.logging_utils import get_logger
|
|
4
|
+
from smftools.optional_imports import require
|
|
2
5
|
|
|
3
6
|
logger = get_logger(__name__)
|
|
4
7
|
|
|
@@ -11,7 +14,7 @@ def display_hmm(hmm, state_labels=["Non-Methylated", "Methylated"], obs_labels=[
|
|
|
11
14
|
state_labels: Optional labels for states.
|
|
12
15
|
obs_labels: Optional labels for observations.
|
|
13
16
|
"""
|
|
14
|
-
|
|
17
|
+
torch = require("torch", extra="torch", purpose="HMM display")
|
|
15
18
|
|
|
16
19
|
logger.info("**HMM Model Overview**")
|
|
17
20
|
logger.info("%s", hmm)
|
smftools/hmm/hmm_readwrite.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from smftools.optional_imports import require
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
def load_hmm(model_path, device="cpu"):
|
|
2
7
|
"""
|
|
3
8
|
Reads in a pretrained HMM.
|
|
@@ -5,7 +10,7 @@ def load_hmm(model_path, device="cpu"):
|
|
|
5
10
|
Parameters:
|
|
6
11
|
model_path (str): Path to a pretrained HMM
|
|
7
12
|
"""
|
|
8
|
-
|
|
13
|
+
torch = require("torch", extra="torch", purpose="HMM read/write")
|
|
9
14
|
|
|
10
15
|
# Load model using PyTorch
|
|
11
16
|
hmm = torch.load(model_path)
|
|
@@ -20,6 +25,6 @@ def save_hmm(model, model_path):
|
|
|
20
25
|
model: HMM model instance.
|
|
21
26
|
model_path: Output path for the model.
|
|
22
27
|
"""
|
|
23
|
-
|
|
28
|
+
torch = require("torch", extra="torch", purpose="HMM read/write")
|
|
24
29
|
|
|
25
30
|
torch.save(model, model_path)
|
smftools/informatics/__init__.py
CHANGED
|
@@ -1,37 +1,56 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
|
|
5
|
+
_LAZY_ATTRS = {
|
|
6
|
+
"_bed_to_bigwig": "smftools.informatics.bed_functions",
|
|
7
|
+
"_plot_bed_histograms": "smftools.informatics.bed_functions",
|
|
8
|
+
"add_demux_type_annotation": "smftools.informatics.h5ad_functions",
|
|
9
|
+
"add_read_length_and_mapping_qc": "smftools.informatics.h5ad_functions",
|
|
10
|
+
"align_and_sort_BAM": "smftools.informatics.bam_functions",
|
|
11
|
+
"bam_qc": "smftools.informatics.bam_functions",
|
|
12
|
+
"basecall_pod5s": "smftools.informatics.pod5_functions",
|
|
13
|
+
"canoncall": "smftools.informatics.basecalling",
|
|
14
|
+
"concatenate_fastqs_to_bam": "smftools.informatics.bam_functions",
|
|
15
|
+
"converted_BAM_to_adata": "smftools.informatics.converted_BAM_to_adata",
|
|
16
|
+
"count_aligned_reads": "smftools.informatics.bam_functions",
|
|
17
|
+
"demux_and_index_BAM": "smftools.informatics.bam_functions",
|
|
18
|
+
"extract_base_identities": "smftools.informatics.bam_functions",
|
|
19
|
+
"extract_mods": "smftools.informatics.modkit_functions",
|
|
20
|
+
"extract_read_features_from_bam": "smftools.informatics.bam_functions",
|
|
21
|
+
"extract_read_lengths_from_bed": "smftools.informatics.bed_functions",
|
|
22
|
+
"extract_readnames_from_bam": "smftools.informatics.bam_functions",
|
|
23
|
+
"fast5_to_pod5": "smftools.informatics.pod5_functions",
|
|
24
|
+
"find_conversion_sites": "smftools.informatics.fasta_functions",
|
|
25
|
+
"generate_converted_FASTA": "smftools.informatics.fasta_functions",
|
|
26
|
+
"get_chromosome_lengths": "smftools.informatics.fasta_functions",
|
|
27
|
+
"get_native_references": "smftools.informatics.fasta_functions",
|
|
28
|
+
"index_fasta": "smftools.informatics.fasta_functions",
|
|
29
|
+
"make_modbed": "smftools.informatics.modkit_functions",
|
|
30
|
+
"modQC": "smftools.informatics.modkit_functions",
|
|
31
|
+
"modcall": "smftools.informatics.basecalling",
|
|
32
|
+
"modkit_extract_to_adata": "smftools.informatics.modkit_extract_to_adata",
|
|
33
|
+
"ohe_batching": "smftools.informatics.ohe",
|
|
34
|
+
"ohe_layers_decode": "smftools.informatics.ohe",
|
|
35
|
+
"one_hot_decode": "smftools.informatics.ohe",
|
|
36
|
+
"one_hot_encode": "smftools.informatics.ohe",
|
|
37
|
+
"run_multiqc": "smftools.informatics.run_multiqc",
|
|
38
|
+
"separate_bam_by_bc": "smftools.informatics.bam_functions",
|
|
39
|
+
"split_and_index_BAM": "smftools.informatics.bam_functions",
|
|
40
|
+
"subsample_fasta_from_bed": "smftools.informatics.fasta_functions",
|
|
41
|
+
"subsample_pod5": "smftools.informatics.pod5_functions",
|
|
42
|
+
"aligned_BAM_to_bed": "smftools.informatics.bed_functions",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def __getattr__(name: str):
|
|
47
|
+
if name in _LAZY_ATTRS:
|
|
48
|
+
module = import_module(_LAZY_ATTRS[name])
|
|
49
|
+
attr = getattr(module, name)
|
|
50
|
+
globals()[name] = attr
|
|
51
|
+
return attr
|
|
52
|
+
raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
|
|
53
|
+
|
|
35
54
|
|
|
36
55
|
__all__ = [
|
|
37
56
|
"basecall_pod5s",
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
## conversion_smf
|
|
2
4
|
|
|
3
5
|
def conversion_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
1
2
|
|
|
2
3
|
def deaminase_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):
|
|
3
4
|
"""
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
## direct_smf
|
|
2
4
|
|
|
3
5
|
def direct_smf(fasta, output_directory, mod_list, model_dir, model, thresholds, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size, basecall, barcode_both_ends, trim, device, make_bigwigs, skip_unclassified, delete_batch_hdfs, threads):
|