PyPI - smftools - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

smftools 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (181) hide show

smftools/__init__.py +43 -13
smftools/_settings.py +6 -6
smftools/_version.py +3 -1
smftools/cli/__init__.py +1 -0
smftools/cli/archived/cli_flows.py +2 -0
smftools/cli/helpers.py +9 -1
smftools/cli/hmm_adata.py +905 -242
smftools/cli/load_adata.py +432 -280
smftools/cli/preprocess_adata.py +287 -171
smftools/cli/spatial_adata.py +141 -53
smftools/cli_entry.py +119 -178
smftools/config/__init__.py +3 -1
smftools/config/conversion.yaml +5 -1
smftools/config/deaminase.yaml +1 -1
smftools/config/default.yaml +26 -18
smftools/config/direct.yaml +8 -3
smftools/config/discover_input_files.py +19 -5
smftools/config/experiment_config.py +511 -276
smftools/constants.py +37 -0
smftools/datasets/__init__.py +4 -8
smftools/datasets/datasets.py +32 -18
smftools/hmm/HMM.py +2133 -1428
smftools/hmm/__init__.py +24 -14
smftools/hmm/archived/apply_hmm_batched.py +2 -0
smftools/hmm/archived/calculate_distances.py +2 -0
smftools/hmm/archived/call_hmm_peaks.py +18 -1
smftools/hmm/archived/train_hmm.py +2 -0
smftools/hmm/call_hmm_peaks.py +176 -193
smftools/hmm/display_hmm.py +23 -7
smftools/hmm/hmm_readwrite.py +20 -6
smftools/hmm/nucleosome_hmm_refinement.py +104 -14
smftools/informatics/__init__.py +55 -13
smftools/informatics/archived/bam_conversion.py +2 -0
smftools/informatics/archived/bam_direct.py +2 -0
smftools/informatics/archived/basecall_pod5s.py +2 -0
smftools/informatics/archived/basecalls_to_adata.py +2 -0
smftools/informatics/archived/conversion_smf.py +2 -0
smftools/informatics/archived/deaminase_smf.py +1 -0
smftools/informatics/archived/direct_smf.py +2 -0
smftools/informatics/archived/fast5_to_pod5.py +2 -0
smftools/informatics/archived/helpers/archived/__init__.py +2 -0
smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +16 -1
smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
smftools/informatics/archived/helpers/archived/informatics.py +2 -0
smftools/informatics/archived/helpers/archived/load_adata.py +5 -3
smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
smftools/informatics/archived/helpers/archived/modQC.py +2 -0
smftools/informatics/archived/helpers/archived/modcall.py +2 -0
smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +5 -1
smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
smftools/informatics/archived/print_bam_query_seq.py +9 -1
smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
smftools/informatics/archived/subsample_pod5.py +2 -0
smftools/informatics/bam_functions.py +1059 -269
smftools/informatics/basecalling.py +53 -9
smftools/informatics/bed_functions.py +357 -114
smftools/informatics/binarize_converted_base_identities.py +21 -7
smftools/informatics/complement_base_list.py +9 -6
smftools/informatics/converted_BAM_to_adata.py +324 -137
smftools/informatics/fasta_functions.py +251 -89
smftools/informatics/h5ad_functions.py +202 -30
smftools/informatics/modkit_extract_to_adata.py +623 -274
smftools/informatics/modkit_functions.py +87 -44
smftools/informatics/ohe.py +46 -21
smftools/informatics/pod5_functions.py +114 -74
smftools/informatics/run_multiqc.py +20 -14
smftools/logging_utils.py +51 -0
smftools/machine_learning/__init__.py +23 -12
smftools/machine_learning/data/__init__.py +2 -0
smftools/machine_learning/data/anndata_data_module.py +157 -50
smftools/machine_learning/data/preprocessing.py +4 -1
smftools/machine_learning/evaluation/__init__.py +3 -1
smftools/machine_learning/evaluation/eval_utils.py +13 -14
smftools/machine_learning/evaluation/evaluators.py +52 -34
smftools/machine_learning/inference/__init__.py +3 -1
smftools/machine_learning/inference/inference_utils.py +9 -4
smftools/machine_learning/inference/lightning_inference.py +14 -13
smftools/machine_learning/inference/sklearn_inference.py +8 -8
smftools/machine_learning/inference/sliding_window_inference.py +37 -25
smftools/machine_learning/models/__init__.py +12 -5
smftools/machine_learning/models/base.py +34 -43
smftools/machine_learning/models/cnn.py +22 -13
smftools/machine_learning/models/lightning_base.py +78 -42
smftools/machine_learning/models/mlp.py +18 -5
smftools/machine_learning/models/positional.py +10 -4
smftools/machine_learning/models/rnn.py +8 -3
smftools/machine_learning/models/sklearn_models.py +46 -24
smftools/machine_learning/models/transformer.py +75 -55
smftools/machine_learning/models/wrappers.py +8 -3
smftools/machine_learning/training/__init__.py +4 -2
smftools/machine_learning/training/train_lightning_model.py +42 -23
smftools/machine_learning/training/train_sklearn_model.py +11 -15
smftools/machine_learning/utils/__init__.py +3 -1
smftools/machine_learning/utils/device.py +12 -5
smftools/machine_learning/utils/grl.py +8 -2
smftools/metadata.py +443 -0
smftools/optional_imports.py +31 -0
smftools/plotting/__init__.py +32 -17
smftools/plotting/autocorrelation_plotting.py +153 -48
smftools/plotting/classifiers.py +175 -73
smftools/plotting/general_plotting.py +350 -168
smftools/plotting/hmm_plotting.py +53 -14
smftools/plotting/position_stats.py +155 -87
smftools/plotting/qc_plotting.py +25 -12
smftools/preprocessing/__init__.py +35 -37
smftools/preprocessing/append_base_context.py +105 -79
smftools/preprocessing/append_binary_layer_by_base_context.py +75 -37
smftools/preprocessing/{archives → archived}/add_read_length_and_mapping_qc.py +2 -0
smftools/preprocessing/{archives → archived}/calculate_complexity.py +5 -1
smftools/preprocessing/{archives → archived}/mark_duplicates.py +2 -0
smftools/preprocessing/{archives → archived}/preprocessing.py +10 -6
smftools/preprocessing/{archives → archived}/remove_duplicates.py +2 -0
smftools/preprocessing/binarize.py +21 -4
smftools/preprocessing/binarize_on_Youden.py +127 -31
smftools/preprocessing/binary_layers_to_ohe.py +18 -11
smftools/preprocessing/calculate_complexity_II.py +89 -59
smftools/preprocessing/calculate_consensus.py +28 -19
smftools/preprocessing/calculate_coverage.py +44 -22
smftools/preprocessing/calculate_pairwise_differences.py +4 -1
smftools/preprocessing/calculate_pairwise_hamming_distances.py +7 -3
smftools/preprocessing/calculate_position_Youden.py +110 -55
smftools/preprocessing/calculate_read_length_stats.py +52 -23
smftools/preprocessing/calculate_read_modification_stats.py +91 -57
smftools/preprocessing/clean_NaN.py +38 -28
smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
smftools/preprocessing/filter_reads_on_length_quality_mapping.py +72 -37
smftools/preprocessing/filter_reads_on_modification_thresholds.py +183 -73
smftools/preprocessing/flag_duplicate_reads.py +708 -303
smftools/preprocessing/invert_adata.py +26 -11
smftools/preprocessing/load_sample_sheet.py +40 -22
smftools/preprocessing/make_dirs.py +9 -3
smftools/preprocessing/min_non_diagonal.py +4 -1
smftools/preprocessing/recipes.py +58 -23
smftools/preprocessing/reindex_references_adata.py +93 -27
smftools/preprocessing/subsample_adata.py +33 -16
smftools/readwrite.py +264 -109
smftools/schema/__init__.py +11 -0
smftools/schema/anndata_schema_v1.yaml +227 -0
smftools/tools/__init__.py +25 -18
smftools/tools/archived/apply_hmm.py +2 -0
smftools/tools/archived/classifiers.py +165 -0
smftools/tools/archived/classify_methylated_features.py +2 -0
smftools/tools/archived/classify_non_methylated_features.py +2 -0
smftools/tools/archived/subset_adata_v1.py +12 -1
smftools/tools/archived/subset_adata_v2.py +14 -1
smftools/tools/calculate_umap.py +56 -15
smftools/tools/cluster_adata_on_methylation.py +122 -47
smftools/tools/general_tools.py +70 -25
smftools/tools/position_stats.py +220 -99
smftools/tools/read_stats.py +50 -29
smftools/tools/spatial_autocorrelation.py +365 -192
smftools/tools/subset_adata.py +23 -21
smftools-0.3.0.dist-info/METADATA +147 -0
smftools-0.3.0.dist-info/RECORD +182 -0
smftools-0.2.4.dist-info/METADATA +0 -141
smftools-0.2.4.dist-info/RECORD +0 -176
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/WHEEL +0 -0
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/entry_points.txt +0 -0
{smftools-0.2.4.dist-info → smftools-0.3.0.dist-info}/licenses/LICENSE +0 -0

smftools/hmm/hmm_readwrite.py CHANGED Viewed

@@ -1,16 +1,30 @@
-def load_hmm(model_path, device='cpu'):
+from __future__ import annotations
+from smftools.optional_imports import require
+def load_hmm(model_path, device="cpu"):
     """
     Reads in a pretrained HMM.
     Parameters:
         model_path (str): Path to a pretrained HMM
     """
-    import torch
+    torch = require("torch", extra="torch", purpose="HMM read/write")
     # Load model using PyTorch
     hmm = torch.load(model_path)
-    hmm.to(device)
+    hmm.to(device)
     return hmm
 def save_hmm(model, model_path):
-    import torch
-    torch.save(model, model_path)
+    """Save a pretrained HMM to disk.
+    Args:
+        model: HMM model instance.
+        model_path: Output path for the model.
+    """
+    torch = require("torch", extra="torch", purpose="HMM read/write")
+    torch.save(model, model_path)

smftools/hmm/nucleosome_hmm_refinement.py CHANGED Viewed

@@ -1,4 +1,33 @@
-def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120, octamer_size=147, max_wiggle=40, device="cpu"):
+from __future__ import annotations
+from smftools.logging_utils import get_logger
+logger = get_logger(__name__)
+def refine_nucleosome_calls(
+    adata,
+    layer_name,
+    nan_mask_layer,
+    hexamer_size=120,
+    octamer_size=147,
+    max_wiggle=40,
+    device="cpu",
+):
+    """Refine nucleosome calls into hexamer/octamer layers.
+    Args:
+        adata: AnnData with nucleosome calls.
+        layer_name: Layer containing initial nucleosome calls.
+        nan_mask_layer: Layer indicating NaN regions.
+        hexamer_size: Size for hexamer placement.
+        octamer_size: Size for octamer placement.
+        max_wiggle: Max boundary expansion into NaNs.
+        device: Device specifier (unused; kept for API parity).
+    Returns:
+        Updated AnnData with hexamer/octamer layers.
+    """
     import numpy as np
     nucleosome_layer = adata.layers[layer_name]
@@ -31,7 +60,10 @@ def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120,
                         break
                 # Right
                 for i in range(1, max_wiggle + 1):
-                    if end_idx + i < nucleosome_layer.shape[1] and nan_mask[read_idx, end_idx + i] == 1:
+                    if (
+                        end_idx + i < nucleosome_layer.shape[1]
+                        and nan_mask[read_idx, end_idx + i] == 1
+                    ):
                         right_expand += 1
                     else:
                         break
@@ -40,26 +72,55 @@ def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120,
                 expanded_end = end_idx + right_expand
                 available_size = expanded_end - expanded_start
                 # Octamer placement
                 if available_size >= octamer_size:
                     center = (expanded_start + expanded_end) // 2
                     half_oct = octamer_size // 2
-                    octamer_layer[read_idx, center - half_oct: center - half_oct + octamer_size] = 1
+                    octamer_layer[
+                        read_idx, center - half_oct : center - half_oct + octamer_size
+                    ] = 1
                 # Hexamer placement
                 elif available_size >= hexamer_size:
                     center = (expanded_start + expanded_end) // 2
                     half_hex = hexamer_size // 2
-                    hexamer_layer[read_idx, center - half_hex: center - half_hex + hexamer_size] = 1
+                    hexamer_layer[
+                        read_idx, center - half_hex : center - half_hex + hexamer_size
+                    ] = 1
     adata.layers[f"{layer_name}_hexamers"] = hexamer_layer
     adata.layers[f"{layer_name}_octamers"] = octamer_layer
-    print(f"Added layers: {layer_name}_hexamers and {layer_name}_octamers")
+    logger.info("Added layers: %s_hexamers and %s_octamers", layer_name, layer_name)
     return adata
-def infer_nucleosomes_in_large_bound(adata, large_bound_layer, combined_nuc_layer, nan_mask_layer, nuc_size=147, linker_size=50, exclusion_buffer=30, device="cpu"):
+def infer_nucleosomes_in_large_bound(
+    adata,
+    large_bound_layer,
+    combined_nuc_layer,
+    nan_mask_layer,
+    nuc_size=147,
+    linker_size=50,
+    exclusion_buffer=30,
+    device="cpu",
+):
+    """Infer nucleosomes in large-bound regions while respecting exclusions.
+    Args:
+        adata: AnnData with bound regions and existing nucleosomes.
+        large_bound_layer: Layer marking large-bound segments.
+        combined_nuc_layer: Layer with existing nucleosome calls.
+        nan_mask_layer: Layer indicating NaN regions.
+        nuc_size: Nucleosome size in bp.
+        linker_size: Minimum linker spacing.
+        exclusion_buffer: Buffer to avoid nearby existing nucleosomes.
+        device: Device specifier (unused; kept for API parity).
+    Returns:
+        Updated AnnData with inferred nucleosome layer.
+    """
     import numpy as np
     large_bound = adata.layers[large_bound_layer]
@@ -82,23 +143,52 @@ def infer_nucleosomes_in_large_bound(adata, large_bound_layer, combined_nuc_laye
                 # Adjust boundaries into flanking NaN regions without getting too close to existing nucleosomes
                 left_expand = start_idx
-                while left_expand > 0 and nan_mask[read_idx, left_expand - 1] == 1 and np.sum(existing_nucs[read_idx, max(0, left_expand - exclusion_buffer):left_expand]) == 0:
+                while (
+                    left_expand > 0
+                    and nan_mask[read_idx, left_expand - 1] == 1
+                    and np.sum(
+                        existing_nucs[
+                            read_idx, max(0, left_expand - exclusion_buffer) : left_expand
+                        ]
+                    )
+                    == 0
+                ):
                     left_expand -= 1
                 right_expand = end_idx
-                while right_expand < row.shape[0] and nan_mask[read_idx, right_expand] == 1 and np.sum(existing_nucs[read_idx, right_expand:min(row.shape[0], right_expand + exclusion_buffer)]) == 0:
+                while (
+                    right_expand < row.shape[0]
+                    and nan_mask[read_idx, right_expand] == 1
+                    and np.sum(
+                        existing_nucs[
+                            read_idx,
+                            right_expand : min(row.shape[0], right_expand + exclusion_buffer),
+                        ]
+                    )
+                    == 0
+                ):
                     right_expand += 1
                 # Phase nucleosomes with linker spacing only
                 region = (left_expand, right_expand)
                 pos_cursor = region[0]
                 while pos_cursor + nuc_size <= region[1]:
-                    if np.all((existing_nucs[read_idx, pos_cursor - exclusion_buffer:pos_cursor + nuc_size + exclusion_buffer] == 0)):
-                        inferred_layer[read_idx, pos_cursor:pos_cursor + nuc_size] = 1
-                        pos_cursor += nuc_size + linker_size
+                    if np.all(
+                        (
+                            existing_nucs[
+                                read_idx,
+                                pos_cursor - exclusion_buffer : pos_cursor
+                                + nuc_size
+                                + exclusion_buffer,
+                            ]
+                            == 0
+                        )
+                    ):
+                        inferred_layer[read_idx, pos_cursor : pos_cursor + nuc_size] = 1
+                        pos_cursor += nuc_size + linker_size
                     else:
                         pos_cursor += 1
     adata.layers[f"{large_bound_layer}_phased_nucleosomes"] = inferred_layer
-    print(f"Added layer: {large_bound_layer}_phased_nucleosomes")
-    return adata
+    logger.info("Added layer: %s_phased_nucleosomes", large_bound_layer)
+    return adata

smftools/informatics/__init__.py CHANGED Viewed

@@ -1,14 +1,56 @@
-from .bam_functions import align_and_sort_BAM, bam_qc, concatenate_fastqs_to_bam, count_aligned_reads, demux_and_index_BAM, extract_base_identities, extract_read_features_from_bam, extract_readnames_from_bam, separate_bam_by_bc, split_and_index_BAM
-from .basecalling import canoncall, modcall
-from .bed_functions import aligned_BAM_to_bed, _bed_to_bigwig, extract_read_lengths_from_bed, _plot_bed_histograms
-from .converted_BAM_to_adata import converted_BAM_to_adata
-from .fasta_functions import find_conversion_sites, generate_converted_FASTA, get_chromosome_lengths, get_native_references, index_fasta, subsample_fasta_from_bed
-from .h5ad_functions import add_demux_type_annotation, add_read_length_and_mapping_qc
-from .modkit_functions import extract_mods, make_modbed, modQC
-from .modkit_extract_to_adata import modkit_extract_to_adata
-from .ohe import one_hot_encode, one_hot_decode, ohe_layers_decode, ohe_batching
-from .pod5_functions import basecall_pod5s, fast5_to_pod5, subsample_pod5
-from .run_multiqc import run_multiqc
+from __future__ import annotations
+from importlib import import_module
+_LAZY_ATTRS = {
+    "_bed_to_bigwig": "smftools.informatics.bed_functions",
+    "_plot_bed_histograms": "smftools.informatics.bed_functions",
+    "add_demux_type_annotation": "smftools.informatics.h5ad_functions",
+    "add_read_length_and_mapping_qc": "smftools.informatics.h5ad_functions",
+    "align_and_sort_BAM": "smftools.informatics.bam_functions",
+    "bam_qc": "smftools.informatics.bam_functions",
+    "basecall_pod5s": "smftools.informatics.pod5_functions",
+    "canoncall": "smftools.informatics.basecalling",
+    "concatenate_fastqs_to_bam": "smftools.informatics.bam_functions",
+    "converted_BAM_to_adata": "smftools.informatics.converted_BAM_to_adata",
+    "count_aligned_reads": "smftools.informatics.bam_functions",
+    "demux_and_index_BAM": "smftools.informatics.bam_functions",
+    "extract_base_identities": "smftools.informatics.bam_functions",
+    "extract_mods": "smftools.informatics.modkit_functions",
+    "extract_read_features_from_bam": "smftools.informatics.bam_functions",
+    "extract_read_lengths_from_bed": "smftools.informatics.bed_functions",
+    "extract_readnames_from_bam": "smftools.informatics.bam_functions",
+    "fast5_to_pod5": "smftools.informatics.pod5_functions",
+    "find_conversion_sites": "smftools.informatics.fasta_functions",
+    "generate_converted_FASTA": "smftools.informatics.fasta_functions",
+    "get_chromosome_lengths": "smftools.informatics.fasta_functions",
+    "get_native_references": "smftools.informatics.fasta_functions",
+    "index_fasta": "smftools.informatics.fasta_functions",
+    "make_modbed": "smftools.informatics.modkit_functions",
+    "modQC": "smftools.informatics.modkit_functions",
+    "modcall": "smftools.informatics.basecalling",
+    "modkit_extract_to_adata": "smftools.informatics.modkit_extract_to_adata",
+    "ohe_batching": "smftools.informatics.ohe",
+    "ohe_layers_decode": "smftools.informatics.ohe",
+    "one_hot_decode": "smftools.informatics.ohe",
+    "one_hot_encode": "smftools.informatics.ohe",
+    "run_multiqc": "smftools.informatics.run_multiqc",
+    "separate_bam_by_bc": "smftools.informatics.bam_functions",
+    "split_and_index_BAM": "smftools.informatics.bam_functions",
+    "subsample_fasta_from_bed": "smftools.informatics.fasta_functions",
+    "subsample_pod5": "smftools.informatics.pod5_functions",
+    "aligned_BAM_to_bed": "smftools.informatics.bed_functions",
+}
+def __getattr__(name: str):
+    if name in _LAZY_ATTRS:
+        module = import_module(_LAZY_ATTRS[name])
+        attr = getattr(module, name)
+        globals()[name] = attr
+        return attr
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
 __all__ = [
     "basecall_pod5s",
@@ -16,5 +58,5 @@ __all__ = [
     "subsample_fasta_from_bed",
     "subsample_pod5",
     "fast5_to_pod5",
-    "run_multiqc"
-]
+    "run_multiqc",
+]

smftools/informatics/archived/bam_conversion.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## bam_conversion
 def bam_conversion(fasta, output_directory, conversion_types, strands, basecalled_path, split_dir, mapping_threshold, experiment_name, bam_suffix):

smftools/informatics/archived/bam_direct.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## bam_direct
 def bam_direct(fasta, output_directory, mod_list, thresholds, bam_path, split_dir, mapping_threshold, experiment_name, bam_suffix, batch_size):

smftools/informatics/archived/basecall_pod5s.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # basecall_pod5s
 def basecall_pod5s(config_path):

smftools/informatics/archived/basecalls_to_adata.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## basecalls_to_adata
 def basecalls_to_adata(config_path):

smftools/informatics/archived/conversion_smf.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## conversion_smf
 def conversion_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):

smftools/informatics/archived/deaminase_smf.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from __future__ import annotations
 def deaminase_smf(fasta, output_directory, conversion_types, strands, model_dir, model, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, basecall, barcode_both_ends, trim, device, make_bigwigs, threads, input_already_demuxed):
     """

smftools/informatics/archived/direct_smf.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## direct_smf
 def direct_smf(fasta, output_directory, mod_list, model_dir, model, thresholds, input_data_path, split_dir, barcode_kit, mapping_threshold, experiment_name, bam_suffix, batch_size, basecall, barcode_both_ends, trim, device, make_bigwigs, skip_unclassified, delete_batch_hdfs, threads):

smftools/informatics/archived/fast5_to_pod5.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 import subprocess
 from typing import Union, List

smftools/informatics/archived/helpers/archived/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # from .align_and_sort_BAM import align_and_sort_BAM
 # from .aligned_BAM_to_bed import aligned_BAM_to_bed
 # from .bam_qc import bam_qc

smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 import os
 import subprocess
@@ -20,6 +22,13 @@ def _bam_to_fastq_with_pysam(bam_path: Union[str, Path], fastq_path: Union[str,
             fq.write(f"@{name}\n{seq}\n+\n{qual}\n")
 def _sort_bam_with_pysam(in_bam: Union[str, Path], out_bam: Union[str, Path], threads: Optional[int] = None) -> None:
+    """Sort a BAM file using pysam.
+    Args:
+        in_bam: Input BAM path.
+        out_bam: Output BAM path.
+        threads: Optional thread count.
+    """
     in_bam, out_bam = str(in_bam), str(out_bam)
     args = []
     if threads:
@@ -28,6 +37,12 @@ def _sort_bam_with_pysam(in_bam: Union[str, Path], out_bam: Union[str, Path], th
     pysam.sort(*args)
 def _index_bam_with_pysam(bam_path: Union[str, Path], threads: Optional[int] = None) -> None:
+    """Index a BAM file using pysam.
+    Args:
+        bam_path: BAM path to index.
+        threads: Optional thread count.
+    """
     bam_path = str(bam_path)
     # pysam.index supports samtools-style args
     if threads:
@@ -123,4 +138,4 @@ def align_and_sort_BAM(fasta,
     #     index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
     # else:
     #     index_command = ["samtools", "index", aligned_sorted_output]
-    # subprocess.run(index_command)
+    # subprocess.run(index_command)

smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
     """
     Takes an aligned BAM as input and writes a BED file of reads as output.

smftools/informatics/archived/helpers/archived/bam_qc.py CHANGED Viewed

@@ -35,6 +35,7 @@ def bam_qc(
     bam_files = [Path(b) for b in bam_files]
     def _has_index(p: Path) -> bool:
+        """Return True if a BAM/CRAM index exists for the path."""
         if p.suffix.lower() == ".bam":
             bai = p.with_suffix(p.suffix + ".bai")
             bai_alt = Path(str(p) + ".bai")
@@ -45,6 +46,7 @@ def bam_qc(
         return False
     def _ensure_index(p: Path) -> None:
+        """Ensure a BAM/CRAM index exists, creating one if needed."""
         if _has_index(p):
             return
         if HAVE_PYSAM:
@@ -55,6 +57,14 @@ def bam_qc(
             subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     def _run_one(bam: Path) -> Tuple[Path, List[Tuple[str, int]]]:
+        """Run QC tasks for a single BAM file.
+        Args:
+            bam: Path to the BAM file.
+        Returns:
+            Tuple of (bam_path, list of (task_name, return_code)).
+        """
         # outputs + return (file, [(task_name, returncode)])
         results: List[Tuple[str, int]] = []
         base = bam.stem  # filename without .bam
@@ -71,6 +81,7 @@ def bam_qc(
         # Choose runner per task
         def run_stats():
+            """Run stats collection for a BAM file."""
             if not stats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "stats"):
@@ -86,6 +97,7 @@ def bam_qc(
                     raise RuntimeError(cp.stderr.decode(errors="replace"))
         def run_flagstat():
+            """Run flagstat collection for a BAM file."""
             if not flagstats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "flagstat"):
@@ -101,6 +113,7 @@ def bam_qc(
                     raise RuntimeError(cp.stderr.decode(errors="replace"))
         def run_idxstats():
+            """Run idxstats collection for a BAM file."""
             if not idxstats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "idxstats"):
@@ -210,4 +223,4 @@ def bam_qc(
 #         elif modality == 'direct':
 #             pass
-#     print("QC processing completed.")
+#     print("QC processing completed.")

smftools/informatics/archived/helpers/archived/bed_to_bigwig.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 from pathlib import Path
 import pybedtools
 import pyBigWig

smftools/informatics/archived/helpers/archived/canoncall.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## canoncall
 # Conversion SMF specific

smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py CHANGED Viewed

@@ -60,6 +60,7 @@ def concatenate_fastqs_to_bam(
         return p.stem  # fallback: remove last suffix only
     def _extract_barcode_from_filename(p: Path) -> str:
+        """Extract a barcode token from a FASTQ filename."""
         stem = _strip_fastq_ext(p)
         if "_" in stem:
             token = stem.split("_")[-1]
@@ -68,6 +69,7 @@ def concatenate_fastqs_to_bam(
         return stem
     def _classify_read_token(stem: str) -> Tuple[Optional[str], Optional[int]]:
+        """Classify a FASTQ filename stem into (prefix, read_number)."""
         # return (prefix, readnum) if matches; else (None, None)
         patterns = [
             r"(?i)(.*?)[._-]r?([12])$",        # prefix_R1 / prefix.r2 / prefix-1
@@ -80,6 +82,7 @@ def concatenate_fastqs_to_bam(
         return None, None
     def _pair_by_filename(paths: List[Path]) -> Tuple[List[Tuple[Path, Path]], List[Path]]:
+        """Pair FASTQ files based on filename conventions."""
         pref_map: Dict[str, Dict[int, Path]] = {}
         unpaired: List[Path] = []
         for pth in paths:
@@ -101,6 +104,7 @@ def concatenate_fastqs_to_bam(
         return pairs, leftovers
     def _fastq_iter(p: Path):
+        """Yield FASTQ records using pysam.FastxFile."""
         # pysam.FastxFile handles compressed extensions transparently
         with pysam.FastxFile(str(p)) as fx:
             for rec in fx:
@@ -114,6 +118,7 @@ def concatenate_fastqs_to_bam(
         read1: bool,
         read2: bool,
     ) -> pysam.AlignedSegment:
+        """Construct an unaligned pysam.AlignedSegment."""
         a = pysam.AlignedSegment()
         a.query_name = name
         a.query_sequence = seq
@@ -136,6 +141,7 @@ def concatenate_fastqs_to_bam(
     # ---------- normalize inputs to Path ----------
     def _to_path_pair(x) -> Tuple[Path, Path]:
+        """Convert a tuple of path-like objects to Path instances."""
         a, b = x
         return Path(a), Path(b)
@@ -205,6 +211,7 @@ def concatenate_fastqs_to_bam(
             for rec1, rec2 in zip_longest(it1, it2, fillvalue=None):
                 def _clean(n: Optional[str]) -> Optional[str]:
+                    """Normalize FASTQ read names by trimming read suffixes."""
                     if n is None:
                         return None
                     return re.sub(r"(?:/1$|/2$|\s[12]$)", "", n)
@@ -256,4 +263,4 @@ def concatenate_fastqs_to_bam(
         "paired_pairs_written": paired_pairs_written,
         "singletons_written": singletons_written,
         "barcodes": barcodes_in_order,
-    }
+    }

smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## converted_BAM_to_adata
 def converted_BAM_to_adata(converted_FASTA, split_dir, mapping_threshold, experiment_name, conversion_types, bam_suffix):

smftools/informatics/archived/helpers/archived/count_aligned_reads.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## count_aligned_reads
 # General

smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## demux_and_index_BAM
 def demux_and_index_BAM(aligned_sorted_BAM, split_dir, bam_suffix, barcode_kit, barcode_both_ends, trim, fasta, make_bigwigs, threads):

smftools/informatics/archived/helpers/archived/extract_base_identities.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 def extract_base_identities(bam_file, chromosome, positions, max_reference_length, sequence):
     """
     Efficiently extracts base identities from mapped reads with reference coordinates.

smftools/informatics/archived/helpers/archived/extract_mods.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## extract_mods
 def extract_mods(thresholds, mod_tsv_dir, split_dir, bam_suffix, skip_unclassified=True, modkit_summary=False, threads=None):

smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # extract_read_features_from_bam
 def extract_read_features_from_bam(bam_file_path):

smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # extract_read_lengths_from_bed
 def extract_read_lengths_from_bed(file_path):

smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # extract_readnames_from_BAM
 def extract_readnames_from_BAM(aligned_BAM):

smftools/informatics/archived/helpers/archived/find_conversion_sites.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 def find_conversion_sites(fasta_file, modification_type, conversions, deaminase_footprinting=False):
     """
     Finds genomic coordinates of modified bases (5mC or 6mA) in a reference FASTA file.

smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import numpy as np
 import gzip
 import os

smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 # get_chromosome_lengths
 def get_chromosome_lengths(fasta):

smftools/informatics/archived/helpers/archived/get_native_references.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## get_native_references
 # Direct methylation specific

smftools/informatics/archived/helpers/archived/index_fasta.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import pysam
 from pathlib import Path

smftools/informatics/archived/helpers/archived/informatics.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## fasta_module
 from .. import readwrite
 # bioinformatic operations

smftools/informatics/archived/helpers/archived/load_adata.py CHANGED Viewed

@@ -1,12 +1,14 @@
+from __future__ import annotations
 # load_adata
 ######################################################################################################
-import .utils
+# Archived helper; legacy imports removed for syntax compatibility.
 # File I/O
 import subprocess
 import gc
 # bioinformatic operations
-import .informatics_module
+# import .informatics_module
 # User interface
 from tqdm import tqdm
@@ -513,4 +515,4 @@ def modkit_extract_to_adata(fasta, bam, mapping_threshold, experiment_name, mods
             print(f"Deleted file: {hdf}")
         except OSError as e:
             print(f"Error deleting file {hdf}: {e}")
-######################################################################################################
+######################################################################################################

smftools/informatics/archived/helpers/archived/make_modbed.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 ## make_modbed
 # Direct SMF

smftools 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

smftools 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl