smftools 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. smftools/__init__.py +2 -6
  2. smftools/_version.py +1 -1
  3. smftools/cli/__init__.py +0 -0
  4. smftools/cli/archived/cli_flows.py +94 -0
  5. smftools/cli/helpers.py +48 -0
  6. smftools/cli/hmm_adata.py +361 -0
  7. smftools/cli/load_adata.py +637 -0
  8. smftools/cli/preprocess_adata.py +455 -0
  9. smftools/cli/spatial_adata.py +697 -0
  10. smftools/cli_entry.py +434 -0
  11. smftools/config/conversion.yaml +18 -6
  12. smftools/config/deaminase.yaml +18 -11
  13. smftools/config/default.yaml +151 -36
  14. smftools/config/direct.yaml +28 -1
  15. smftools/config/discover_input_files.py +115 -0
  16. smftools/config/experiment_config.py +225 -27
  17. smftools/hmm/HMM.py +12 -1
  18. smftools/hmm/__init__.py +0 -6
  19. smftools/hmm/archived/call_hmm_peaks.py +106 -0
  20. smftools/hmm/call_hmm_peaks.py +318 -90
  21. smftools/informatics/__init__.py +13 -7
  22. smftools/informatics/archived/fast5_to_pod5.py +43 -0
  23. smftools/informatics/archived/helpers/archived/__init__.py +71 -0
  24. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +126 -0
  25. smftools/informatics/{helpers → archived/helpers/archived}/aligned_BAM_to_bed.py +6 -4
  26. smftools/informatics/archived/helpers/archived/bam_qc.py +213 -0
  27. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +90 -0
  28. smftools/informatics/archived/helpers/archived/concatenate_fastqs_to_bam.py +259 -0
  29. smftools/informatics/{helpers → archived/helpers/archived}/count_aligned_reads.py +2 -2
  30. smftools/informatics/{helpers → archived/helpers/archived}/demux_and_index_BAM.py +8 -10
  31. smftools/informatics/{helpers → archived/helpers/archived}/extract_base_identities.py +1 -1
  32. smftools/informatics/{helpers → archived/helpers/archived}/extract_mods.py +15 -13
  33. smftools/informatics/{helpers → archived/helpers/archived}/generate_converted_FASTA.py +2 -0
  34. smftools/informatics/{helpers → archived/helpers/archived}/get_chromosome_lengths.py +9 -8
  35. smftools/informatics/archived/helpers/archived/index_fasta.py +24 -0
  36. smftools/informatics/{helpers → archived/helpers/archived}/make_modbed.py +1 -2
  37. smftools/informatics/{helpers → archived/helpers/archived}/modQC.py +2 -2
  38. smftools/informatics/{helpers → archived/helpers/archived}/plot_bed_histograms.py +0 -19
  39. smftools/informatics/{helpers → archived/helpers/archived}/separate_bam_by_bc.py +6 -5
  40. smftools/informatics/{helpers → archived/helpers/archived}/split_and_index_BAM.py +7 -7
  41. smftools/informatics/archived/subsample_fasta_from_bed.py +49 -0
  42. smftools/informatics/bam_functions.py +811 -0
  43. smftools/informatics/basecalling.py +67 -0
  44. smftools/informatics/bed_functions.py +366 -0
  45. smftools/informatics/{helpers/converted_BAM_to_adata_II.py → converted_BAM_to_adata.py} +42 -30
  46. smftools/informatics/fasta_functions.py +255 -0
  47. smftools/informatics/h5ad_functions.py +197 -0
  48. smftools/informatics/{helpers/modkit_extract_to_adata.py → modkit_extract_to_adata.py} +142 -59
  49. smftools/informatics/modkit_functions.py +129 -0
  50. smftools/informatics/ohe.py +160 -0
  51. smftools/informatics/pod5_functions.py +224 -0
  52. smftools/informatics/{helpers/run_multiqc.py → run_multiqc.py} +5 -2
  53. smftools/plotting/autocorrelation_plotting.py +1 -3
  54. smftools/plotting/general_plotting.py +1084 -363
  55. smftools/plotting/position_stats.py +3 -3
  56. smftools/preprocessing/__init__.py +4 -4
  57. smftools/preprocessing/append_base_context.py +35 -26
  58. smftools/preprocessing/append_binary_layer_by_base_context.py +6 -6
  59. smftools/preprocessing/binarize.py +17 -0
  60. smftools/preprocessing/binarize_on_Youden.py +11 -9
  61. smftools/preprocessing/calculate_complexity_II.py +1 -1
  62. smftools/preprocessing/calculate_coverage.py +16 -13
  63. smftools/preprocessing/calculate_position_Youden.py +42 -26
  64. smftools/preprocessing/calculate_read_modification_stats.py +2 -2
  65. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +1 -1
  66. smftools/preprocessing/filter_reads_on_modification_thresholds.py +20 -20
  67. smftools/preprocessing/flag_duplicate_reads.py +2 -2
  68. smftools/preprocessing/invert_adata.py +1 -1
  69. smftools/preprocessing/load_sample_sheet.py +1 -1
  70. smftools/preprocessing/reindex_references_adata.py +37 -0
  71. smftools/readwrite.py +360 -140
  72. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/METADATA +26 -19
  73. smftools-0.2.4.dist-info/RECORD +176 -0
  74. smftools-0.2.4.dist-info/entry_points.txt +2 -0
  75. smftools/cli.py +0 -184
  76. smftools/informatics/fast5_to_pod5.py +0 -24
  77. smftools/informatics/helpers/__init__.py +0 -73
  78. smftools/informatics/helpers/align_and_sort_BAM.py +0 -86
  79. smftools/informatics/helpers/bam_qc.py +0 -66
  80. smftools/informatics/helpers/bed_to_bigwig.py +0 -39
  81. smftools/informatics/helpers/concatenate_fastqs_to_bam.py +0 -378
  82. smftools/informatics/helpers/discover_input_files.py +0 -100
  83. smftools/informatics/helpers/index_fasta.py +0 -12
  84. smftools/informatics/helpers/make_dirs.py +0 -21
  85. smftools/informatics/readwrite.py +0 -106
  86. smftools/informatics/subsample_fasta_from_bed.py +0 -47
  87. smftools/load_adata.py +0 -1346
  88. smftools-0.2.1.dist-info/RECORD +0 -161
  89. smftools-0.2.1.dist-info/entry_points.txt +0 -2
  90. /smftools/hmm/{apply_hmm_batched.py → archived/apply_hmm_batched.py} +0 -0
  91. /smftools/hmm/{calculate_distances.py → archived/calculate_distances.py} +0 -0
  92. /smftools/hmm/{train_hmm.py → archived/train_hmm.py} +0 -0
  93. /smftools/informatics/{basecall_pod5s.py → archived/basecall_pod5s.py} +0 -0
  94. /smftools/informatics/{helpers → archived/helpers/archived}/canoncall.py +0 -0
  95. /smftools/informatics/{helpers → archived/helpers/archived}/converted_BAM_to_adata.py +0 -0
  96. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_features_from_bam.py +0 -0
  97. /smftools/informatics/{helpers → archived/helpers/archived}/extract_read_lengths_from_bed.py +0 -0
  98. /smftools/informatics/{helpers → archived/helpers/archived}/extract_readnames_from_BAM.py +0 -0
  99. /smftools/informatics/{helpers → archived/helpers/archived}/find_conversion_sites.py +0 -0
  100. /smftools/informatics/{helpers → archived/helpers/archived}/get_native_references.py +0 -0
  101. /smftools/informatics/{helpers → archived/helpers}/archived/informatics.py +0 -0
  102. /smftools/informatics/{helpers → archived/helpers}/archived/load_adata.py +0 -0
  103. /smftools/informatics/{helpers → archived/helpers/archived}/modcall.py +0 -0
  104. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_batching.py +0 -0
  105. /smftools/informatics/{helpers → archived/helpers/archived}/ohe_layers_decode.py +0 -0
  106. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_decode.py +0 -0
  107. /smftools/informatics/{helpers → archived/helpers/archived}/one_hot_encode.py +0 -0
  108. /smftools/informatics/{subsample_pod5.py → archived/subsample_pod5.py} +0 -0
  109. /smftools/informatics/{helpers/binarize_converted_base_identities.py → binarize_converted_base_identities.py} +0 -0
  110. /smftools/informatics/{helpers/complement_base_list.py → complement_base_list.py} +0 -0
  111. /smftools/preprocessing/{add_read_length_and_mapping_qc.py → archives/add_read_length_and_mapping_qc.py} +0 -0
  112. /smftools/preprocessing/{calculate_complexity.py → archives/calculate_complexity.py} +0 -0
  113. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/WHEEL +0 -0
  114. {smftools-0.2.1.dist-info → smftools-0.2.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,126 @@
1
+ from pathlib import Path
2
+ import os
3
+ import subprocess
4
+ from typing import List, Optional, Union
5
+ import pysam
6
+
7
+ def _bam_to_fastq_with_pysam(bam_path: Union[str, Path], fastq_path: Union[str, Path]) -> None:
8
+ """
9
+ Minimal BAM->FASTQ using pysam. Writes unmapped or unaligned reads as-is.
10
+ """
11
+ bam_path = str(bam_path)
12
+ fastq_path = str(fastq_path)
13
+ with pysam.AlignmentFile(bam_path, "rb", check_sq=False) as bam, open(fastq_path, "w") as fq:
14
+ for r in bam.fetch(until_eof=True):
15
+ # Skip secondary/supplementary if you want (optional):
16
+ # if r.is_secondary or r.is_supplementary: continue
17
+ name = r.query_name
18
+ seq = r.query_sequence or ""
19
+ qual = r.qual or ""
20
+ fq.write(f"@{name}\n{seq}\n+\n{qual}\n")
21
+
22
+ def _sort_bam_with_pysam(in_bam: Union[str, Path], out_bam: Union[str, Path], threads: Optional[int] = None) -> None:
23
+ in_bam, out_bam = str(in_bam), str(out_bam)
24
+ args = []
25
+ if threads:
26
+ args += ["-@", str(threads)]
27
+ args += ["-o", out_bam, in_bam]
28
+ pysam.sort(*args)
29
+
30
+ def _index_bam_with_pysam(bam_path: Union[str, Path], threads: Optional[int] = None) -> None:
31
+ bam_path = str(bam_path)
32
+ # pysam.index supports samtools-style args
33
+ if threads:
34
+ pysam.index("-@", str(threads), bam_path)
35
+ else:
36
+ pysam.index(bam_path)
37
+
38
+ def align_and_sort_BAM(fasta,
39
+ input,
40
+ bam_suffix='.bam',
41
+ output_directory='aligned_outputs',
42
+ make_bigwigs=False,
43
+ threads=None,
44
+ aligner='minimap2',
45
+ aligner_args=['-a', '-x', 'map-ont', '--MD', '-Y', '-y', '-N', '5', '--secondary=no']):
46
+ """
47
+ A wrapper for running dorado aligner and samtools functions
48
+
49
+ Parameters:
50
+ fasta (str): File path to the reference genome to align to.
51
+ input (str): File path to the basecalled file to align. Works for .bam and .fastq files
52
+ bam_suffix (str): The suffix to use for the BAM file.
53
+ output_directory (str): A file path to the directory to output all the analyses.
54
+ make_bigwigs (bool): Whether to make bigwigs
55
+ threads (int): Number of additional threads to use
56
+ aligner (str): Aligner to use. minimap2 and dorado options
57
+ aligner_args (list): list of optional parameters to use for the alignment
58
+
59
+ Returns:
60
+ None
61
+ The function writes out files for: 1) An aligned BAM, 2) and aligned_sorted BAM, 3) an index file for the aligned_sorted BAM, 4) A bed file for the aligned_sorted BAM, 5) A text file containing read names in the aligned_sorted BAM
62
+ """
63
+ input_basename = input.name
64
+ input_suffix = input.suffix
65
+ input_as_fastq = input.with_name(input.stem + '.fastq')
66
+
67
+ output_path_minus_suffix = output_directory / input.stem
68
+
69
+ aligned_BAM = output_path_minus_suffix.with_name(output_path_minus_suffix.stem + "_aligned")
70
+ aligned_output = aligned_BAM.with_suffix(bam_suffix)
71
+ aligned_sorted_BAM =aligned_BAM.with_name(aligned_BAM.stem + "_sorted")
72
+ aligned_sorted_output = aligned_sorted_BAM.with_suffix(bam_suffix)
73
+
74
+ if threads:
75
+ threads = str(threads)
76
+ else:
77
+ pass
78
+
79
+ if aligner == 'minimap2':
80
+ print(f"Converting BAM to FASTQ: {input}")
81
+ _bam_to_fastq_with_pysam(input, input_as_fastq)
82
+ # bam_to_fastq_command = ['samtools', 'fastq', input]
83
+ # subprocess.run(bam_to_fastq_command, stdout=open(input_as_fastq, "w"))
84
+ print(f"Aligning FASTQ to Reference: {input_as_fastq}")
85
+ if threads:
86
+ minimap_command = ['minimap2'] + aligner_args + ['-t', threads, str(fasta), str(input_as_fastq)]
87
+ else:
88
+ minimap_command = ['minimap2'] + aligner_args + [str(fasta), str(input_as_fastq)]
89
+ subprocess.run(minimap_command, stdout=open(aligned_output, "w"))
90
+ os.remove(input_as_fastq)
91
+
92
+ elif aligner == 'dorado':
93
+ # Run dorado aligner
94
+ print(f"Aligning BAM to Reference: {input}")
95
+ if threads:
96
+ alignment_command = ["dorado", "aligner", "-t", threads] + aligner_args + [str(fasta), str(input)]
97
+ else:
98
+ alignment_command = ["dorado", "aligner"] + aligner_args + [str(fasta), str(input)]
99
+ subprocess.run(alignment_command, stdout=open(aligned_output, "wb"))
100
+
101
+ else:
102
+ print(f'Aligner not recognized: {aligner}. Choose from minimap2 and dorado')
103
+ return
104
+
105
+ # --- Sort & Index with pysam ---
106
+ print(f"[pysam] Sorting: {aligned_output} -> {aligned_sorted_output}")
107
+ _sort_bam_with_pysam(aligned_output, aligned_sorted_output, threads=threads)
108
+
109
+ print(f"[pysam] Indexing: {aligned_sorted_output}")
110
+ _index_bam_with_pysam(aligned_sorted_output, threads=threads)
111
+
112
+ # Sort the BAM on positional coordinates
113
+ # print(f"Sorting BAM: {aligned_output}")
114
+ # if threads:
115
+ # sort_command = ["samtools", "sort", "-@", threads, "-o", aligned_sorted_output, aligned_output]
116
+ # else:
117
+ # sort_command = ["samtools", "sort", "-o", aligned_sorted_output, aligned_output]
118
+ # subprocess.run(sort_command)
119
+
120
+ # # Create a BAM index file
121
+ # print(f"Indexing BAM: {aligned_sorted_output}")
122
+ # if threads:
123
+ # index_command = ["samtools", "index", "-@", threads, aligned_sorted_output]
124
+ # else:
125
+ # index_command = ["samtools", "index", aligned_sorted_output]
126
+ # subprocess.run(index_command)
@@ -15,22 +15,23 @@ def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
15
15
  """
16
16
  import subprocess
17
17
  import os
18
+ from pathlib import Path
18
19
  import pysam
19
20
  import numpy as np
20
21
  import concurrent.futures
21
22
  from concurrent.futures import ProcessPoolExecutor
22
23
  from .bed_to_bigwig import bed_to_bigwig
23
- from . import make_dirs
24
+ from ...readwrite import make_dirs
24
25
  from .plot_bed_histograms import plot_bed_histograms
25
26
 
26
27
  threads = threads or os.cpu_count() # Use max available cores if not specified
27
28
 
28
29
  # Create necessary directories
29
- plotting_dir = os.path.join(out_dir, "bed_cov_histograms")
30
- bed_dir = os.path.join(out_dir, "beds")
30
+ plotting_dir = out_dir / "bed_cov_histograms"
31
+ bed_dir = out_dir / "beds"
31
32
  make_dirs([plotting_dir, bed_dir])
32
33
 
33
- bed_output = os.path.join(bed_dir, os.path.basename(aligned_BAM).replace(".bam", "_bed.bed"))
34
+ bed_output = bed_dir / str(aligned_BAM.name).replace(".bam", "_bed.bed")
34
35
 
35
36
  print(f"Creating BED-like file from BAM (with MAPQ and avg base quality): {aligned_BAM}")
36
37
 
@@ -64,6 +65,7 @@ def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
64
65
 
65
66
  def split_bed(bed):
66
67
  """Splits into aligned and unaligned reads (chrom == '*')."""
68
+ bed = str(bed)
67
69
  aligned = bed.replace(".bed", "_aligned.bed")
68
70
  unaligned = bed.replace(".bed", "_unaligned.bed")
69
71
  with open(bed, "r") as infile, open(aligned, "w") as aligned_out, open(unaligned, "w") as unaligned_out:
@@ -0,0 +1,213 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from typing import Iterable, Optional, Tuple, List
7
+
8
+ def bam_qc(
9
+ bam_files: Iterable[str | Path],
10
+ bam_qc_dir: str | Path,
11
+ threads: Optional[int],
12
+ modality: str,
13
+ stats: bool = True,
14
+ flagstats: bool = True,
15
+ idxstats: bool = True,
16
+ ) -> None:
17
+ """
18
+ QC for BAM/CRAMs: stats, flagstat, idxstats.
19
+ Prefers pysam; falls back to `samtools` if needed.
20
+ Runs BAMs in parallel (up to `threads`, default serial).
21
+ """
22
+ import subprocess
23
+ import shutil
24
+
25
+ # Try to import pysam once
26
+ try:
27
+ import pysam
28
+ HAVE_PYSAM = True
29
+ except Exception:
30
+ HAVE_PYSAM = False
31
+
32
+ bam_qc_dir = Path(bam_qc_dir)
33
+ bam_qc_dir.mkdir(parents=True, exist_ok=True)
34
+
35
+ bam_files = [Path(b) for b in bam_files]
36
+
37
+ def _has_index(p: Path) -> bool:
38
+ if p.suffix.lower() == ".bam":
39
+ bai = p.with_suffix(p.suffix + ".bai")
40
+ bai_alt = Path(str(p) + ".bai")
41
+ return bai.exists() or bai_alt.exists()
42
+ if p.suffix.lower() == ".cram":
43
+ crai = Path(str(p) + ".crai")
44
+ return crai.exists()
45
+ return False
46
+
47
+ def _ensure_index(p: Path) -> None:
48
+ if _has_index(p):
49
+ return
50
+ if HAVE_PYSAM:
51
+ # pysam.index supports both BAM & CRAM
52
+ pysam.index(str(p))
53
+ else:
54
+ cmd = ["samtools", "index", str(p)]
55
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
56
+
57
+ def _run_one(bam: Path) -> Tuple[Path, List[Tuple[str, int]]]:
58
+ # outputs + return (file, [(task_name, returncode)])
59
+ results: List[Tuple[str, int]] = []
60
+ base = bam.stem # filename without .bam
61
+ out_stats = bam_qc_dir / f"{base}_stats.txt"
62
+ out_flag = bam_qc_dir / f"{base}_flagstat.txt"
63
+ out_idx = bam_qc_dir / f"{base}_idxstats.txt"
64
+
65
+ # Make sure index exists (samtools stats/flagstat don’t require, idxstats does)
66
+ try:
67
+ _ensure_index(bam)
68
+ except Exception as e:
69
+ # Still attempt stats/flagstat if requested
70
+ print(f"[warn] Indexing failed for {bam}: {e}")
71
+
72
+ # Choose runner per task
73
+ def run_stats():
74
+ if not stats:
75
+ return
76
+ if HAVE_PYSAM and hasattr(pysam, "stats"):
77
+ txt = pysam.stats(str(bam))
78
+ out_stats.write_text(txt)
79
+ results.append(("stats(pysam)", 0))
80
+ else:
81
+ cmd = ["samtools", "stats", str(bam)]
82
+ with open(out_stats, "w") as fh:
83
+ cp = subprocess.run(cmd, stdout=fh, stderr=subprocess.PIPE)
84
+ results.append(("stats(samtools)", cp.returncode))
85
+ if cp.returncode != 0:
86
+ raise RuntimeError(cp.stderr.decode(errors="replace"))
87
+
88
+ def run_flagstat():
89
+ if not flagstats:
90
+ return
91
+ if HAVE_PYSAM and hasattr(pysam, "flagstat"):
92
+ txt = pysam.flagstat(str(bam))
93
+ out_flag.write_text(txt)
94
+ results.append(("flagstat(pysam)", 0))
95
+ else:
96
+ cmd = ["samtools", "flagstat", str(bam)]
97
+ with open(out_flag, "w") as fh:
98
+ cp = subprocess.run(cmd, stdout=fh, stderr=subprocess.PIPE)
99
+ results.append(("flagstat(samtools)", cp.returncode))
100
+ if cp.returncode != 0:
101
+ raise RuntimeError(cp.stderr.decode(errors="replace"))
102
+
103
+ def run_idxstats():
104
+ if not idxstats:
105
+ return
106
+ if HAVE_PYSAM and hasattr(pysam, "idxstats"):
107
+ txt = pysam.idxstats(str(bam))
108
+ out_idx.write_text(txt)
109
+ results.append(("idxstats(pysam)", 0))
110
+ else:
111
+ cmd = ["samtools", "idxstats", str(bam)]
112
+ with open(out_idx, "w") as fh:
113
+ cp = subprocess.run(cmd, stdout=fh, stderr=subprocess.PIPE)
114
+ results.append(("idxstats(samtools)", cp.returncode))
115
+ if cp.returncode != 0:
116
+ raise RuntimeError(cp.stderr.decode(errors="replace"))
117
+
118
+ # Sanity: ensure samtools exists if pysam missing
119
+ if not HAVE_PYSAM:
120
+ if not shutil.which("samtools"):
121
+ raise RuntimeError("Neither pysam nor samtools is available in PATH.")
122
+
123
+ # Execute tasks (serial per file; parallelized across files)
124
+ run_stats()
125
+ run_flagstat()
126
+ run_idxstats()
127
+ return bam, results
128
+
129
+ # Parallel across BAMs
130
+ max_workers = int(threads) if threads and int(threads) > 0 else 1
131
+ futures = []
132
+ with ThreadPoolExecutor(max_workers=max_workers) as ex:
133
+ for b in bam_files:
134
+ futures.append(ex.submit(_run_one, b))
135
+
136
+ for fut in as_completed(futures):
137
+ try:
138
+ bam, res = fut.result()
139
+ summary = ", ".join(f"{name}:{rc}" for name, rc in res) or "no-op"
140
+ print(f"[qc] {bam.name}: {summary}")
141
+ except Exception as e:
142
+ print(f"[error] QC failed: {e}")
143
+
144
+ # Placeholders to keep your signature stable
145
+ if modality not in {"conversion", "direct"}:
146
+ print(f"[warn] Unknown modality '{modality}', continuing.")
147
+
148
+ print("QC processing completed.")
149
+
150
+ # def bam_qc(bam_files, bam_qc_dir, threads, modality, stats=True, flagstats=True, idxstats=True):
151
+ # """
152
+ # Performs QC on BAM files by running samtools stats, flagstat, and idxstats.
153
+
154
+ # Parameters:
155
+ # - bam_files: List of BAM file paths.
156
+ # - bam_qc_dir: Directory to save QC reports.
157
+ # - threads: Number threads to use.
158
+ # - modality: 'conversion' or 'direct' (affects processing mode).
159
+ # - stats: Run `samtools stats` if True.
160
+ # - flagstats: Run `samtools flagstat` if True.
161
+ # - idxstats: Run `samtools idxstats` if True.
162
+ # """
163
+ # import os
164
+ # import subprocess
165
+
166
+ # # Ensure the QC output directory exists
167
+ # os.makedirs(bam_qc_dir, exist_ok=True)
168
+
169
+ # if threads:
170
+ # threads = str(threads)
171
+ # else:
172
+ # pass
173
+
174
+ # for bam in bam_files:
175
+ # bam_name = os.path.basename(bam).replace(".bam", "") # Extract filename without extension
176
+
177
+ # # Run samtools QC commands based on selected options
178
+ # if stats:
179
+ # stats_out = os.path.join(bam_qc_dir, f"{bam_name}_stats.txt")
180
+ # if threads:
181
+ # command = ["samtools", "stats", "-@", threads, bam]
182
+ # else:
183
+ # command = ["samtools", "stats", bam]
184
+ # print(f"Running: {' '.join(command)} > {stats_out}")
185
+ # with open(stats_out, "w") as out_file:
186
+ # subprocess.run(command, stdout=out_file)
187
+
188
+ # if flagstats:
189
+ # flagstats_out = os.path.join(bam_qc_dir, f"{bam_name}_flagstat.txt")
190
+ # if threads:
191
+ # command = ["samtools", "flagstat", "-@", threads, bam]
192
+ # else:
193
+ # command = ["samtools", "flagstat", bam]
194
+ # print(f"Running: {' '.join(command)} > {flagstats_out}")
195
+ # with open(flagstats_out, "w") as out_file:
196
+ # subprocess.run(command, stdout=out_file)
197
+
198
+ # if idxstats:
199
+ # idxstats_out = os.path.join(bam_qc_dir, f"{bam_name}_idxstats.txt")
200
+ # if threads:
201
+ # command = ["samtools", "idxstats", "-@", threads, bam]
202
+ # else:
203
+ # command = ["samtools", "idxstats", bam]
204
+ # print(f"Running: {' '.join(command)} > {idxstats_out}")
205
+ # with open(idxstats_out, "w") as out_file:
206
+ # subprocess.run(command, stdout=out_file)
207
+
208
+ # if modality == 'conversion':
209
+ # pass
210
+ # elif modality == 'direct':
211
+ # pass
212
+
213
+ # print("QC processing completed.")
@@ -0,0 +1,90 @@
1
+ from pathlib import Path
2
+ import pybedtools
3
+ import pyBigWig
4
+
5
+ def bed_to_bigwig(fasta: str, bed: str) -> str:
6
+ """
7
+ BED → bedGraph → bigWig
8
+ Requires:
9
+ - FASTA must have .fai index present
10
+ """
11
+
12
+ bed = Path(bed)
13
+ fa = Path(fasta) # path to .fa
14
+ parent = bed.parent
15
+ stem = bed.stem
16
+ fa_stem = fa.stem
17
+ fai = parent / f"{fa_stem}.fai"
18
+
19
+ bedgraph = parent / f"{stem}.bedgraph"
20
+ bigwig = parent / f"{stem}.bw"
21
+
22
+ # 1) Compute coverage → bedGraph
23
+ print(f"[pybedtools] generating coverage bedgraph from {bed}")
24
+ bt = pybedtools.BedTool(str(bed))
25
+ # bedtools genomecov -bg
26
+ coverage = bt.genome_coverage(bg=True, genome=str(fai))
27
+ coverage.saveas(str(bedgraph))
28
+
29
+ # 2) Convert bedGraph → BigWig via pyBigWig
30
+ print(f"[pyBigWig] converting bedgraph → bigwig: {bigwig}")
31
+
32
+ # read chrom sizes from the FASTA .fai index
33
+ chrom_sizes = {}
34
+ with open(fai) as f:
35
+ for line in f:
36
+ fields = line.strip().split("\t")
37
+ chrom = fields[0]
38
+ size = int(fields[1])
39
+ chrom_sizes[chrom] = size
40
+
41
+ bw = pyBigWig.open(str(bigwig), "w")
42
+ bw.addHeader(list(chrom_sizes.items()))
43
+
44
+ with open(bedgraph) as f:
45
+ for line in f:
46
+ chrom, start, end, coverage = line.strip().split()
47
+ bw.addEntries(chrom, int(start), ends=int(end), values=float(coverage))
48
+
49
+ bw.close()
50
+
51
+ print(f"BigWig written: {bigwig}")
52
+ return str(bigwig)
53
+
54
+ # def bed_to_bigwig(fasta, bed):
55
+ # """
56
+ # Takes a bed file of reads and makes a bedgraph plus a bigwig
57
+
58
+ # Parameters:
59
+ # fasta (str): File path to the reference genome to align to.
60
+ # bed (str): File path to the input bed.
61
+ # Returns:
62
+ # None
63
+ # """
64
+ # import os
65
+ # import subprocess
66
+
67
+ # bed_basename = os.path.basename(bed)
68
+ # parent_dir = os.path.dirname(bed)
69
+ # bed_basename_minus_suffix = bed_basename.split('.bed')[0]
70
+ # fasta_basename = os.path.basename(fasta)
71
+ # fasta_dir = os.path.dirname(fasta)
72
+ # fasta_basename_minus_suffix = fasta_basename.split('.fa')[0]
73
+ # chrom_basename = fasta_basename_minus_suffix + '.chrom.sizes'
74
+ # chrom_path = os.path.join(fasta_dir, chrom_basename)
75
+ # bedgraph_basename = bed_basename_minus_suffix + '_bedgraph.bedgraph'
76
+ # bedgraph_output = os.path.join(parent_dir, bedgraph_basename)
77
+ # bigwig_basename = bed_basename_minus_suffix + '_bigwig.bw'
78
+ # bigwig_output = os.path.join(parent_dir, bigwig_basename)
79
+
80
+ # # Make the bedgraph
81
+ # with open(bedgraph_output, 'w') as outfile:
82
+ # # Command as a list
83
+ # command = ["bedtools", "genomecov", "-i", bed, "-g", chrom_path, "-bg"]
84
+ # print(f'Making bedgraph from {bed_basename}')
85
+ # subprocess.run(command, stdout=outfile)
86
+
87
+ # # Make the bigwig
88
+ # command = ["bedGraphToBigWig", bedgraph_output, chrom_path, bigwig_output]
89
+ # print(f'Making bigwig from {bedgraph_basename}')
90
+ # subprocess.run(command)