PyPI - smftools - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

smftools 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

smftools/__init__.py +6 -8
smftools/_settings.py +4 -6
smftools/_version.py +1 -1
smftools/cli/helpers.py +54 -0
smftools/cli/hmm_adata.py +937 -256
smftools/cli/load_adata.py +448 -268
smftools/cli/preprocess_adata.py +469 -263
smftools/cli/spatial_adata.py +536 -319
smftools/cli_entry.py +97 -182
smftools/config/__init__.py +1 -1
smftools/config/conversion.yaml +17 -6
smftools/config/deaminase.yaml +12 -10
smftools/config/default.yaml +142 -33
smftools/config/direct.yaml +11 -3
smftools/config/discover_input_files.py +19 -5
smftools/config/experiment_config.py +594 -264
smftools/constants.py +37 -0
smftools/datasets/__init__.py +2 -8
smftools/datasets/datasets.py +32 -18
smftools/hmm/HMM.py +2128 -1418
smftools/hmm/__init__.py +2 -9
smftools/hmm/archived/call_hmm_peaks.py +121 -0
smftools/hmm/call_hmm_peaks.py +299 -91
smftools/hmm/display_hmm.py +19 -6
smftools/hmm/hmm_readwrite.py +13 -4
smftools/hmm/nucleosome_hmm_refinement.py +102 -14
smftools/informatics/__init__.py +30 -7
smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +14 -1
smftools/informatics/archived/helpers/archived/bam_qc.py +14 -1
smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +8 -1
smftools/informatics/archived/helpers/archived/load_adata.py +3 -3
smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +3 -1
smftools/informatics/archived/print_bam_query_seq.py +7 -1
smftools/informatics/bam_functions.py +397 -175
smftools/informatics/basecalling.py +51 -9
smftools/informatics/bed_functions.py +90 -57
smftools/informatics/binarize_converted_base_identities.py +18 -7
smftools/informatics/complement_base_list.py +7 -6
smftools/informatics/converted_BAM_to_adata.py +265 -122
smftools/informatics/fasta_functions.py +161 -83
smftools/informatics/h5ad_functions.py +196 -30
smftools/informatics/modkit_extract_to_adata.py +609 -270
smftools/informatics/modkit_functions.py +85 -44
smftools/informatics/ohe.py +44 -21
smftools/informatics/pod5_functions.py +112 -73
smftools/informatics/run_multiqc.py +20 -14
smftools/logging_utils.py +51 -0
smftools/machine_learning/__init__.py +2 -7
smftools/machine_learning/data/anndata_data_module.py +143 -50
smftools/machine_learning/data/preprocessing.py +2 -1
smftools/machine_learning/evaluation/__init__.py +1 -1
smftools/machine_learning/evaluation/eval_utils.py +11 -14
smftools/machine_learning/evaluation/evaluators.py +46 -33
smftools/machine_learning/inference/__init__.py +1 -1
smftools/machine_learning/inference/inference_utils.py +7 -4
smftools/machine_learning/inference/lightning_inference.py +9 -13
smftools/machine_learning/inference/sklearn_inference.py +6 -8
smftools/machine_learning/inference/sliding_window_inference.py +35 -25
smftools/machine_learning/models/__init__.py +10 -5
smftools/machine_learning/models/base.py +28 -42
smftools/machine_learning/models/cnn.py +15 -11
smftools/machine_learning/models/lightning_base.py +71 -40
smftools/machine_learning/models/mlp.py +13 -4
smftools/machine_learning/models/positional.py +3 -2
smftools/machine_learning/models/rnn.py +3 -2
smftools/machine_learning/models/sklearn_models.py +39 -22
smftools/machine_learning/models/transformer.py +68 -53
smftools/machine_learning/models/wrappers.py +2 -1
smftools/machine_learning/training/__init__.py +2 -2
smftools/machine_learning/training/train_lightning_model.py +29 -20
smftools/machine_learning/training/train_sklearn_model.py +9 -15
smftools/machine_learning/utils/__init__.py +1 -1
smftools/machine_learning/utils/device.py +7 -4
smftools/machine_learning/utils/grl.py +3 -1
smftools/metadata.py +443 -0
smftools/plotting/__init__.py +19 -5
smftools/plotting/autocorrelation_plotting.py +145 -44
smftools/plotting/classifiers.py +162 -72
smftools/plotting/general_plotting.py +422 -197
smftools/plotting/hmm_plotting.py +42 -13
smftools/plotting/position_stats.py +147 -87
smftools/plotting/qc_plotting.py +20 -12
smftools/preprocessing/__init__.py +10 -12
smftools/preprocessing/append_base_context.py +115 -80
smftools/preprocessing/append_binary_layer_by_base_context.py +77 -39
smftools/preprocessing/{calculate_complexity.py → archived/calculate_complexity.py} +3 -1
smftools/preprocessing/{archives → archived}/preprocessing.py +8 -6
smftools/preprocessing/binarize.py +21 -4
smftools/preprocessing/binarize_on_Youden.py +129 -31
smftools/preprocessing/binary_layers_to_ohe.py +17 -11
smftools/preprocessing/calculate_complexity_II.py +86 -59
smftools/preprocessing/calculate_consensus.py +28 -19
smftools/preprocessing/calculate_coverage.py +50 -25
smftools/preprocessing/calculate_pairwise_differences.py +2 -1
smftools/preprocessing/calculate_pairwise_hamming_distances.py +4 -3
smftools/preprocessing/calculate_position_Youden.py +118 -54
smftools/preprocessing/calculate_read_length_stats.py +52 -23
smftools/preprocessing/calculate_read_modification_stats.py +91 -57
smftools/preprocessing/clean_NaN.py +38 -28
smftools/preprocessing/filter_adata_by_nan_proportion.py +24 -12
smftools/preprocessing/filter_reads_on_length_quality_mapping.py +71 -38
smftools/preprocessing/filter_reads_on_modification_thresholds.py +181 -73
smftools/preprocessing/flag_duplicate_reads.py +689 -272
smftools/preprocessing/invert_adata.py +26 -11
smftools/preprocessing/load_sample_sheet.py +40 -22
smftools/preprocessing/make_dirs.py +8 -3
smftools/preprocessing/min_non_diagonal.py +2 -1
smftools/preprocessing/recipes.py +56 -23
smftools/preprocessing/reindex_references_adata.py +103 -0
smftools/preprocessing/subsample_adata.py +33 -16
smftools/readwrite.py +331 -82
smftools/schema/__init__.py +11 -0
smftools/schema/anndata_schema_v1.yaml +227 -0
smftools/tools/__init__.py +3 -4
smftools/tools/archived/classifiers.py +163 -0
smftools/tools/archived/subset_adata_v1.py +10 -1
smftools/tools/archived/subset_adata_v2.py +12 -1
smftools/tools/calculate_umap.py +54 -15
smftools/tools/cluster_adata_on_methylation.py +115 -46
smftools/tools/general_tools.py +70 -25
smftools/tools/position_stats.py +229 -98
smftools/tools/read_stats.py +50 -29
smftools/tools/spatial_autocorrelation.py +365 -192
smftools/tools/subset_adata.py +23 -21
{smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/METADATA +17 -39
smftools-0.2.5.dist-info/RECORD +181 -0
smftools-0.2.3.dist-info/RECORD +0 -173
/smftools/cli/{cli_flows.py → archived/cli_flows.py} +0 -0
/smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
/smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
/smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
/smftools/preprocessing/{add_read_length_and_mapping_qc.py → archived/add_read_length_and_mapping_qc.py} +0 -0
/smftools/preprocessing/{archives → archived}/mark_duplicates.py +0 -0
/smftools/preprocessing/{archives → archived}/remove_duplicates.py +0 -0
{smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/WHEEL +0 -0
{smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/entry_points.txt +0 -0
{smftools-0.2.3.dist-info → smftools-0.2.5.dist-info}/licenses/LICENSE +0 -0

smftools/hmm/nucleosome_hmm_refinement.py CHANGED Viewed

@@ -1,4 +1,31 @@
-def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120, octamer_size=147, max_wiggle=40, device="cpu"):
+from smftools.logging_utils import get_logger
+logger = get_logger(__name__)
+def refine_nucleosome_calls(
+    adata,
+    layer_name,
+    nan_mask_layer,
+    hexamer_size=120,
+    octamer_size=147,
+    max_wiggle=40,
+    device="cpu",
+):
+    """Refine nucleosome calls into hexamer/octamer layers.
+    Args:
+        adata: AnnData with nucleosome calls.
+        layer_name: Layer containing initial nucleosome calls.
+        nan_mask_layer: Layer indicating NaN regions.
+        hexamer_size: Size for hexamer placement.
+        octamer_size: Size for octamer placement.
+        max_wiggle: Max boundary expansion into NaNs.
+        device: Device specifier (unused; kept for API parity).
+    Returns:
+        Updated AnnData with hexamer/octamer layers.
+    """
     import numpy as np
     nucleosome_layer = adata.layers[layer_name]
@@ -31,7 +58,10 @@ def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120,
                         break
                 # Right
                 for i in range(1, max_wiggle + 1):
-                    if end_idx + i < nucleosome_layer.shape[1] and nan_mask[read_idx, end_idx + i] == 1:
+                    if (
+                        end_idx + i < nucleosome_layer.shape[1]
+                        and nan_mask[read_idx, end_idx + i] == 1
+                    ):
                         right_expand += 1
                     else:
                         break
@@ -40,26 +70,55 @@ def refine_nucleosome_calls(adata, layer_name, nan_mask_layer, hexamer_size=120,
                 expanded_end = end_idx + right_expand
                 available_size = expanded_end - expanded_start
                 # Octamer placement
                 if available_size >= octamer_size:
                     center = (expanded_start + expanded_end) // 2
                     half_oct = octamer_size // 2
-                    octamer_layer[read_idx, center - half_oct: center - half_oct + octamer_size] = 1
+                    octamer_layer[
+                        read_idx, center - half_oct : center - half_oct + octamer_size
+                    ] = 1
                 # Hexamer placement
                 elif available_size >= hexamer_size:
                     center = (expanded_start + expanded_end) // 2
                     half_hex = hexamer_size // 2
-                    hexamer_layer[read_idx, center - half_hex: center - half_hex + hexamer_size] = 1
+                    hexamer_layer[
+                        read_idx, center - half_hex : center - half_hex + hexamer_size
+                    ] = 1
     adata.layers[f"{layer_name}_hexamers"] = hexamer_layer
     adata.layers[f"{layer_name}_octamers"] = octamer_layer
-    print(f"Added layers: {layer_name}_hexamers and {layer_name}_octamers")
+    logger.info("Added layers: %s_hexamers and %s_octamers", layer_name, layer_name)
     return adata
-def infer_nucleosomes_in_large_bound(adata, large_bound_layer, combined_nuc_layer, nan_mask_layer, nuc_size=147, linker_size=50, exclusion_buffer=30, device="cpu"):
+def infer_nucleosomes_in_large_bound(
+    adata,
+    large_bound_layer,
+    combined_nuc_layer,
+    nan_mask_layer,
+    nuc_size=147,
+    linker_size=50,
+    exclusion_buffer=30,
+    device="cpu",
+):
+    """Infer nucleosomes in large-bound regions while respecting exclusions.
+    Args:
+        adata: AnnData with bound regions and existing nucleosomes.
+        large_bound_layer: Layer marking large-bound segments.
+        combined_nuc_layer: Layer with existing nucleosome calls.
+        nan_mask_layer: Layer indicating NaN regions.
+        nuc_size: Nucleosome size in bp.
+        linker_size: Minimum linker spacing.
+        exclusion_buffer: Buffer to avoid nearby existing nucleosomes.
+        device: Device specifier (unused; kept for API parity).
+    Returns:
+        Updated AnnData with inferred nucleosome layer.
+    """
     import numpy as np
     large_bound = adata.layers[large_bound_layer]
@@ -82,23 +141,52 @@ def infer_nucleosomes_in_large_bound(adata, large_bound_layer, combined_nuc_laye
                 # Adjust boundaries into flanking NaN regions without getting too close to existing nucleosomes
                 left_expand = start_idx
-                while left_expand > 0 and nan_mask[read_idx, left_expand - 1] == 1 and np.sum(existing_nucs[read_idx, max(0, left_expand - exclusion_buffer):left_expand]) == 0:
+                while (
+                    left_expand > 0
+                    and nan_mask[read_idx, left_expand - 1] == 1
+                    and np.sum(
+                        existing_nucs[
+                            read_idx, max(0, left_expand - exclusion_buffer) : left_expand
+                        ]
+                    )
+                    == 0
+                ):
                     left_expand -= 1
                 right_expand = end_idx
-                while right_expand < row.shape[0] and nan_mask[read_idx, right_expand] == 1 and np.sum(existing_nucs[read_idx, right_expand:min(row.shape[0], right_expand + exclusion_buffer)]) == 0:
+                while (
+                    right_expand < row.shape[0]
+                    and nan_mask[read_idx, right_expand] == 1
+                    and np.sum(
+                        existing_nucs[
+                            read_idx,
+                            right_expand : min(row.shape[0], right_expand + exclusion_buffer),
+                        ]
+                    )
+                    == 0
+                ):
                     right_expand += 1
                 # Phase nucleosomes with linker spacing only
                 region = (left_expand, right_expand)
                 pos_cursor = region[0]
                 while pos_cursor + nuc_size <= region[1]:
-                    if np.all((existing_nucs[read_idx, pos_cursor - exclusion_buffer:pos_cursor + nuc_size + exclusion_buffer] == 0)):
-                        inferred_layer[read_idx, pos_cursor:pos_cursor + nuc_size] = 1
-                        pos_cursor += nuc_size + linker_size
+                    if np.all(
+                        (
+                            existing_nucs[
+                                read_idx,
+                                pos_cursor - exclusion_buffer : pos_cursor
+                                + nuc_size
+                                + exclusion_buffer,
+                            ]
+                            == 0
+                        )
+                    ):
+                        inferred_layer[read_idx, pos_cursor : pos_cursor + nuc_size] = 1
+                        pos_cursor += nuc_size + linker_size
                     else:
                         pos_cursor += 1
     adata.layers[f"{large_bound_layer}_phased_nucleosomes"] = inferred_layer
-    print(f"Added layer: {large_bound_layer}_phased_nucleosomes")
-    return adata
+    logger.info("Added layer: %s_phased_nucleosomes", large_bound_layer)
+    return adata

smftools/informatics/__init__.py CHANGED Viewed

@@ -1,12 +1,35 @@
-from .bam_functions import align_and_sort_BAM, bam_qc, concatenate_fastqs_to_bam, count_aligned_reads, demux_and_index_BAM, extract_base_identities, extract_read_features_from_bam, extract_readnames_from_bam, separate_bam_by_bc, split_and_index_BAM
+from .bam_functions import (
+    align_and_sort_BAM,
+    bam_qc,
+    concatenate_fastqs_to_bam,
+    count_aligned_reads,
+    demux_and_index_BAM,
+    extract_base_identities,
+    extract_read_features_from_bam,
+    extract_readnames_from_bam,
+    separate_bam_by_bc,
+    split_and_index_BAM,
+)
 from .basecalling import canoncall, modcall
-from .bed_functions import aligned_BAM_to_bed, _bed_to_bigwig, extract_read_lengths_from_bed, _plot_bed_histograms
+from .bed_functions import (
+    _bed_to_bigwig,
+    _plot_bed_histograms,
+    aligned_BAM_to_bed,
+    extract_read_lengths_from_bed,
+)
 from .converted_BAM_to_adata import converted_BAM_to_adata
-from .fasta_functions import find_conversion_sites, generate_converted_FASTA, get_chromosome_lengths, get_native_references, index_fasta, subsample_fasta_from_bed
+from .fasta_functions import (
+    find_conversion_sites,
+    generate_converted_FASTA,
+    get_chromosome_lengths,
+    get_native_references,
+    index_fasta,
+    subsample_fasta_from_bed,
+)
 from .h5ad_functions import add_demux_type_annotation, add_read_length_and_mapping_qc
-from .modkit_functions import extract_mods, make_modbed, modQC
 from .modkit_extract_to_adata import modkit_extract_to_adata
-from .ohe import one_hot_encode, one_hot_decode, ohe_layers_decode, ohe_batching
+from .modkit_functions import extract_mods, make_modbed, modQC
+from .ohe import ohe_batching, ohe_layers_decode, one_hot_decode, one_hot_encode
 from .pod5_functions import basecall_pod5s, fast5_to_pod5, subsample_pod5
 from .run_multiqc import run_multiqc
@@ -16,5 +39,5 @@ __all__ = [
     "subsample_fasta_from_bed",
     "subsample_pod5",
     "fast5_to_pod5",
-    "run_multiqc"
-]
+    "run_multiqc",
+]

smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py CHANGED Viewed

@@ -20,6 +20,13 @@ def _bam_to_fastq_with_pysam(bam_path: Union[str, Path], fastq_path: Union[str,
             fq.write(f"@{name}\n{seq}\n+\n{qual}\n")
 def _sort_bam_with_pysam(in_bam: Union[str, Path], out_bam: Union[str, Path], threads: Optional[int] = None) -> None:
+    """Sort a BAM file using pysam.
+    Args:
+        in_bam: Input BAM path.
+        out_bam: Output BAM path.
+        threads: Optional thread count.
+    """
     in_bam, out_bam = str(in_bam), str(out_bam)
     args = []
     if threads:
@@ -28,6 +35,12 @@ def _sort_bam_with_pysam(in_bam: Union[str, Path], out_bam: Union[str, Path], th
     pysam.sort(*args)
 def _index_bam_with_pysam(bam_path: Union[str, Path], threads: Optional[int] = None) -> None:
+    """Index a BAM file using pysam.
+    Args:
+        bam_path: BAM path to index.
+        threads: Optional thread count.
+    """
     bam_path = str(bam_path)
     # pysam.index supports samtools-style args
     if threads:
@@ -123,4 +136,4 @@ def align_and_sort_BAM(fasta,
     #     index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
     # else:
     #     index_command = ["samtools", "index", aligned_sorted_output]
-    # subprocess.run(index_command)
+    # subprocess.run(index_command)

smftools/informatics/archived/helpers/archived/bam_qc.py CHANGED Viewed

@@ -35,6 +35,7 @@ def bam_qc(
     bam_files = [Path(b) for b in bam_files]
     def _has_index(p: Path) -> bool:
+        """Return True if a BAM/CRAM index exists for the path."""
         if p.suffix.lower() == ".bam":
             bai = p.with_suffix(p.suffix + ".bai")
             bai_alt = Path(str(p) + ".bai")
@@ -45,6 +46,7 @@ def bam_qc(
         return False
     def _ensure_index(p: Path) -> None:
+        """Ensure a BAM/CRAM index exists, creating one if needed."""
         if _has_index(p):
             return
         if HAVE_PYSAM:
@@ -55,6 +57,14 @@ def bam_qc(
             subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     def _run_one(bam: Path) -> Tuple[Path, List[Tuple[str, int]]]:
+        """Run QC tasks for a single BAM file.
+        Args:
+            bam: Path to the BAM file.
+        Returns:
+            Tuple of (bam_path, list of (task_name, return_code)).
+        """
         # outputs + return (file, [(task_name, returncode)])
         results: List[Tuple[str, int]] = []
         base = bam.stem  # filename without .bam
@@ -71,6 +81,7 @@ def bam_qc(
         # Choose runner per task
         def run_stats():
+            """Run stats collection for a BAM file."""
             if not stats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "stats"):
@@ -86,6 +97,7 @@ def bam_qc(
                     raise RuntimeError(cp.stderr.decode(errors="replace"))
         def run_flagstat():
+            """Run flagstat collection for a BAM file."""
             if not flagstats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "flagstat"):
@@ -101,6 +113,7 @@ def bam_qc(
                     raise RuntimeError(cp.stderr.decode(errors="replace"))
         def run_idxstats():
+            """Run idxstats collection for a BAM file."""
             if not idxstats:
                 return
             if HAVE_PYSAM and hasattr(pysam, "idxstats"):
@@ -210,4 +223,4 @@ def bam_qc(
 #         elif modality == 'direct':
 #             pass
-#     print("QC processing completed.")
+#     print("QC processing completed.")

smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py CHANGED Viewed

@@ -60,6 +60,7 @@ def concatenate_fastqs_to_bam(
         return p.stem  # fallback: remove last suffix only
     def _extract_barcode_from_filename(p: Path) -> str:
+        """Extract a barcode token from a FASTQ filename."""
         stem = _strip_fastq_ext(p)
         if "_" in stem:
             token = stem.split("_")[-1]
@@ -68,6 +69,7 @@ def concatenate_fastqs_to_bam(
         return stem
     def _classify_read_token(stem: str) -> Tuple[Optional[str], Optional[int]]:
+        """Classify a FASTQ filename stem into (prefix, read_number)."""
         # return (prefix, readnum) if matches; else (None, None)
         patterns = [
             r"(?i)(.*?)[._-]r?([12])$",        # prefix_R1 / prefix.r2 / prefix-1
@@ -80,6 +82,7 @@ def concatenate_fastqs_to_bam(
         return None, None
     def _pair_by_filename(paths: List[Path]) -> Tuple[List[Tuple[Path, Path]], List[Path]]:
+        """Pair FASTQ files based on filename conventions."""
         pref_map: Dict[str, Dict[int, Path]] = {}
         unpaired: List[Path] = []
         for pth in paths:
@@ -101,6 +104,7 @@ def concatenate_fastqs_to_bam(
         return pairs, leftovers
     def _fastq_iter(p: Path):
+        """Yield FASTQ records using pysam.FastxFile."""
         # pysam.FastxFile handles compressed extensions transparently
         with pysam.FastxFile(str(p)) as fx:
             for rec in fx:
@@ -114,6 +118,7 @@ def concatenate_fastqs_to_bam(
         read1: bool,
         read2: bool,
     ) -> pysam.AlignedSegment:
+        """Construct an unaligned pysam.AlignedSegment."""
         a = pysam.AlignedSegment()
         a.query_name = name
         a.query_sequence = seq
@@ -136,6 +141,7 @@ def concatenate_fastqs_to_bam(
     # ---------- normalize inputs to Path ----------
     def _to_path_pair(x) -> Tuple[Path, Path]:
+        """Convert a tuple of path-like objects to Path instances."""
         a, b = x
         return Path(a), Path(b)
@@ -205,6 +211,7 @@ def concatenate_fastqs_to_bam(
             for rec1, rec2 in zip_longest(it1, it2, fillvalue=None):
                 def _clean(n: Optional[str]) -> Optional[str]:
+                    """Normalize FASTQ read names by trimming read suffixes."""
                     if n is None:
                         return None
                     return re.sub(r"(?:/1$|/2$|\s[12]$)", "", n)
@@ -256,4 +263,4 @@ def concatenate_fastqs_to_bam(
         "paired_pairs_written": paired_pairs_written,
         "singletons_written": singletons_written,
         "barcodes": barcodes_in_order,
-    }
+    }

smftools/informatics/archived/helpers/archived/load_adata.py CHANGED Viewed

@@ -1,12 +1,12 @@
 # load_adata
 ######################################################################################################
-import .utils
+# Archived helper; legacy imports removed for syntax compatibility.
 # File I/O
 import subprocess
 import gc
 # bioinformatic operations
-import .informatics_module
+# import .informatics_module
 # User interface
 from tqdm import tqdm
@@ -513,4 +513,4 @@ def modkit_extract_to_adata(fasta, bam, mapping_threshold, experiment_name, mods
             print(f"Deleted file: {hdf}")
         except OSError as e:
             print(f"Error deleting file {hdf}: {e}")
-######################################################################################################
+######################################################################################################

smftools/informatics/archived/helpers/archived/plot_bed_histograms.py CHANGED Viewed

@@ -86,6 +86,7 @@ def plot_bed_histograms(
     # Clip helper for hist tails
     def _clip_series(s, q=(0.0, 0.995)):
+        """Clip a Series to quantile bounds for plotting."""
         if q is None:
             return s.to_numpy()
         lo = s.quantile(q[0]) if q[0] is not None else s.min()
@@ -109,6 +110,7 @@ def plot_bed_histograms(
     # Pagination
     def _sanitize(name: str) -> str:
+        """Sanitize a string for use in filenames."""
         return "".join(ch if ch.isalnum() or ch in "-._" else "_" for ch in name)
     cols_per_fig = 4 if include_mapq_quality else 2
@@ -247,4 +249,4 @@ def plot_bed_histograms(
     #     plt.grid(True)
     #     save_name = os.path.join(plotting_directory, f'{bed_basename}_{chrom}_coverage_histogram.png')
     #     plt.savefig(save_name)
-    #     plt.close()
+    #     plt.close()

smftools/informatics/archived/print_bam_query_seq.py CHANGED Viewed

@@ -2,6 +2,12 @@ import pysam
 import sys
 def extract_reads(bam_file_path, num_reads=10):
+    """Print sequences for the first N reads in a BAM file.
+    Args:
+        bam_file_path: Path to BAM file.
+        num_reads: Number of reads to print.
+    """
     # Open the BAM file
     bam_file = pysam.AlignmentFile(bam_file_path, "rb")
@@ -26,4 +32,4 @@ if __name__ == "__main__":
     bam_file_path = sys.argv[1]
     # Call the function to extract the first 10 reads
-    extract_reads(bam_file_path)
+    extract_reads(bam_file_path)

smftools 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

smftools 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl