smftools 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. smftools/__init__.py +39 -7
  2. smftools/_settings.py +2 -0
  3. smftools/_version.py +3 -1
  4. smftools/cli/__init__.py +1 -0
  5. smftools/cli/archived/cli_flows.py +2 -0
  6. smftools/cli/helpers.py +34 -6
  7. smftools/cli/hmm_adata.py +239 -33
  8. smftools/cli/latent_adata.py +318 -0
  9. smftools/cli/load_adata.py +167 -131
  10. smftools/cli/preprocess_adata.py +180 -53
  11. smftools/cli/spatial_adata.py +152 -100
  12. smftools/cli_entry.py +38 -1
  13. smftools/config/__init__.py +2 -0
  14. smftools/config/conversion.yaml +11 -1
  15. smftools/config/default.yaml +42 -2
  16. smftools/config/experiment_config.py +59 -1
  17. smftools/constants.py +65 -0
  18. smftools/datasets/__init__.py +2 -0
  19. smftools/hmm/HMM.py +97 -3
  20. smftools/hmm/__init__.py +24 -13
  21. smftools/hmm/archived/apply_hmm_batched.py +2 -0
  22. smftools/hmm/archived/calculate_distances.py +2 -0
  23. smftools/hmm/archived/call_hmm_peaks.py +2 -0
  24. smftools/hmm/archived/train_hmm.py +2 -0
  25. smftools/hmm/call_hmm_peaks.py +5 -2
  26. smftools/hmm/display_hmm.py +4 -1
  27. smftools/hmm/hmm_readwrite.py +7 -2
  28. smftools/hmm/nucleosome_hmm_refinement.py +2 -0
  29. smftools/informatics/__init__.py +59 -34
  30. smftools/informatics/archived/bam_conversion.py +2 -0
  31. smftools/informatics/archived/bam_direct.py +2 -0
  32. smftools/informatics/archived/basecall_pod5s.py +2 -0
  33. smftools/informatics/archived/basecalls_to_adata.py +2 -0
  34. smftools/informatics/archived/conversion_smf.py +2 -0
  35. smftools/informatics/archived/deaminase_smf.py +1 -0
  36. smftools/informatics/archived/direct_smf.py +2 -0
  37. smftools/informatics/archived/fast5_to_pod5.py +2 -0
  38. smftools/informatics/archived/helpers/archived/__init__.py +2 -0
  39. smftools/informatics/archived/helpers/archived/align_and_sort_BAM.py +2 -0
  40. smftools/informatics/archived/helpers/archived/aligned_BAM_to_bed.py +2 -0
  41. smftools/informatics/archived/helpers/archived/bed_to_bigwig.py +2 -0
  42. smftools/informatics/archived/helpers/archived/canoncall.py +2 -0
  43. smftools/informatics/archived/helpers/archived/converted_BAM_to_adata.py +2 -0
  44. smftools/informatics/archived/helpers/archived/count_aligned_reads.py +2 -0
  45. smftools/informatics/archived/helpers/archived/demux_and_index_BAM.py +2 -0
  46. smftools/informatics/archived/helpers/archived/extract_base_identities.py +2 -0
  47. smftools/informatics/archived/helpers/archived/extract_mods.py +2 -0
  48. smftools/informatics/archived/helpers/archived/extract_read_features_from_bam.py +2 -0
  49. smftools/informatics/archived/helpers/archived/extract_read_lengths_from_bed.py +2 -0
  50. smftools/informatics/archived/helpers/archived/extract_readnames_from_BAM.py +2 -0
  51. smftools/informatics/archived/helpers/archived/find_conversion_sites.py +2 -0
  52. smftools/informatics/archived/helpers/archived/generate_converted_FASTA.py +2 -0
  53. smftools/informatics/archived/helpers/archived/get_chromosome_lengths.py +2 -0
  54. smftools/informatics/archived/helpers/archived/get_native_references.py +2 -0
  55. smftools/informatics/archived/helpers/archived/index_fasta.py +2 -0
  56. smftools/informatics/archived/helpers/archived/informatics.py +2 -0
  57. smftools/informatics/archived/helpers/archived/load_adata.py +2 -0
  58. smftools/informatics/archived/helpers/archived/make_modbed.py +2 -0
  59. smftools/informatics/archived/helpers/archived/modQC.py +2 -0
  60. smftools/informatics/archived/helpers/archived/modcall.py +2 -0
  61. smftools/informatics/archived/helpers/archived/ohe_batching.py +2 -0
  62. smftools/informatics/archived/helpers/archived/ohe_layers_decode.py +2 -0
  63. smftools/informatics/archived/helpers/archived/one_hot_decode.py +2 -0
  64. smftools/informatics/archived/helpers/archived/one_hot_encode.py +2 -0
  65. smftools/informatics/archived/helpers/archived/plot_bed_histograms.py +2 -0
  66. smftools/informatics/archived/helpers/archived/separate_bam_by_bc.py +2 -0
  67. smftools/informatics/archived/helpers/archived/split_and_index_BAM.py +2 -0
  68. smftools/informatics/archived/print_bam_query_seq.py +2 -0
  69. smftools/informatics/archived/subsample_fasta_from_bed.py +2 -0
  70. smftools/informatics/archived/subsample_pod5.py +2 -0
  71. smftools/informatics/bam_functions.py +1093 -176
  72. smftools/informatics/basecalling.py +2 -0
  73. smftools/informatics/bed_functions.py +271 -61
  74. smftools/informatics/binarize_converted_base_identities.py +3 -0
  75. smftools/informatics/complement_base_list.py +2 -0
  76. smftools/informatics/converted_BAM_to_adata.py +641 -176
  77. smftools/informatics/fasta_functions.py +94 -10
  78. smftools/informatics/h5ad_functions.py +123 -4
  79. smftools/informatics/modkit_extract_to_adata.py +1019 -431
  80. smftools/informatics/modkit_functions.py +2 -0
  81. smftools/informatics/ohe.py +2 -0
  82. smftools/informatics/pod5_functions.py +3 -2
  83. smftools/informatics/sequence_encoding.py +72 -0
  84. smftools/logging_utils.py +21 -2
  85. smftools/machine_learning/__init__.py +22 -6
  86. smftools/machine_learning/data/__init__.py +2 -0
  87. smftools/machine_learning/data/anndata_data_module.py +18 -4
  88. smftools/machine_learning/data/preprocessing.py +2 -0
  89. smftools/machine_learning/evaluation/__init__.py +2 -0
  90. smftools/machine_learning/evaluation/eval_utils.py +2 -0
  91. smftools/machine_learning/evaluation/evaluators.py +14 -9
  92. smftools/machine_learning/inference/__init__.py +2 -0
  93. smftools/machine_learning/inference/inference_utils.py +2 -0
  94. smftools/machine_learning/inference/lightning_inference.py +6 -1
  95. smftools/machine_learning/inference/sklearn_inference.py +2 -0
  96. smftools/machine_learning/inference/sliding_window_inference.py +2 -0
  97. smftools/machine_learning/models/__init__.py +2 -0
  98. smftools/machine_learning/models/base.py +7 -2
  99. smftools/machine_learning/models/cnn.py +7 -2
  100. smftools/machine_learning/models/lightning_base.py +16 -11
  101. smftools/machine_learning/models/mlp.py +5 -1
  102. smftools/machine_learning/models/positional.py +7 -2
  103. smftools/machine_learning/models/rnn.py +5 -1
  104. smftools/machine_learning/models/sklearn_models.py +14 -9
  105. smftools/machine_learning/models/transformer.py +7 -2
  106. smftools/machine_learning/models/wrappers.py +6 -2
  107. smftools/machine_learning/training/__init__.py +2 -0
  108. smftools/machine_learning/training/train_lightning_model.py +13 -3
  109. smftools/machine_learning/training/train_sklearn_model.py +2 -0
  110. smftools/machine_learning/utils/__init__.py +2 -0
  111. smftools/machine_learning/utils/device.py +5 -1
  112. smftools/machine_learning/utils/grl.py +5 -1
  113. smftools/metadata.py +1 -1
  114. smftools/optional_imports.py +31 -0
  115. smftools/plotting/__init__.py +41 -31
  116. smftools/plotting/autocorrelation_plotting.py +9 -5
  117. smftools/plotting/classifiers.py +16 -4
  118. smftools/plotting/general_plotting.py +2415 -629
  119. smftools/plotting/hmm_plotting.py +97 -9
  120. smftools/plotting/position_stats.py +15 -7
  121. smftools/plotting/qc_plotting.py +6 -1
  122. smftools/preprocessing/__init__.py +36 -37
  123. smftools/preprocessing/append_base_context.py +17 -17
  124. smftools/preprocessing/append_mismatch_frequency_sites.py +158 -0
  125. smftools/preprocessing/archived/add_read_length_and_mapping_qc.py +2 -0
  126. smftools/preprocessing/archived/calculate_complexity.py +2 -0
  127. smftools/preprocessing/archived/mark_duplicates.py +2 -0
  128. smftools/preprocessing/archived/preprocessing.py +2 -0
  129. smftools/preprocessing/archived/remove_duplicates.py +2 -0
  130. smftools/preprocessing/binary_layers_to_ohe.py +2 -1
  131. smftools/preprocessing/calculate_complexity_II.py +4 -1
  132. smftools/preprocessing/calculate_consensus.py +1 -1
  133. smftools/preprocessing/calculate_pairwise_differences.py +2 -0
  134. smftools/preprocessing/calculate_pairwise_hamming_distances.py +3 -0
  135. smftools/preprocessing/calculate_position_Youden.py +9 -2
  136. smftools/preprocessing/calculate_read_modification_stats.py +6 -1
  137. smftools/preprocessing/filter_reads_on_length_quality_mapping.py +2 -0
  138. smftools/preprocessing/filter_reads_on_modification_thresholds.py +2 -0
  139. smftools/preprocessing/flag_duplicate_reads.py +42 -54
  140. smftools/preprocessing/make_dirs.py +2 -1
  141. smftools/preprocessing/min_non_diagonal.py +2 -0
  142. smftools/preprocessing/recipes.py +2 -0
  143. smftools/readwrite.py +53 -17
  144. smftools/schema/anndata_schema_v1.yaml +15 -1
  145. smftools/tools/__init__.py +30 -18
  146. smftools/tools/archived/apply_hmm.py +2 -0
  147. smftools/tools/archived/classifiers.py +2 -0
  148. smftools/tools/archived/classify_methylated_features.py +2 -0
  149. smftools/tools/archived/classify_non_methylated_features.py +2 -0
  150. smftools/tools/archived/subset_adata_v1.py +2 -0
  151. smftools/tools/archived/subset_adata_v2.py +2 -0
  152. smftools/tools/calculate_leiden.py +57 -0
  153. smftools/tools/calculate_nmf.py +119 -0
  154. smftools/tools/calculate_umap.py +93 -8
  155. smftools/tools/cluster_adata_on_methylation.py +7 -1
  156. smftools/tools/position_stats.py +17 -27
  157. smftools/tools/rolling_nn_distance.py +235 -0
  158. smftools/tools/tensor_factorization.py +169 -0
  159. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/METADATA +69 -33
  160. smftools-0.3.1.dist-info/RECORD +189 -0
  161. smftools-0.2.5.dist-info/RECORD +0 -181
  162. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/WHEEL +0 -0
  163. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/entry_points.txt +0 -0
  164. {smftools-0.2.5.dist-info → smftools-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import subprocess
2
4
 
3
5
 
@@ -1,23 +1,134 @@
1
+ from __future__ import annotations
2
+
1
3
  import concurrent.futures
2
4
  import os
5
+ import shutil
6
+ import subprocess
3
7
  from concurrent.futures import ProcessPoolExecutor
4
8
  from pathlib import Path
9
+ from typing import TYPE_CHECKING
5
10
 
6
- import matplotlib.pyplot as plt
7
11
  import numpy as np
8
12
  import pandas as pd
9
- import pybedtools
10
- import pyBigWig
11
- import pysam
12
13
 
13
14
  from smftools.logging_utils import get_logger
15
+ from smftools.optional_imports import require
14
16
 
15
17
  from ..readwrite import make_dirs
16
18
 
17
19
  logger = get_logger(__name__)
18
20
 
19
-
20
- def _bed_to_bigwig(fasta: str, bed: str) -> str:
21
+ if TYPE_CHECKING:
22
+ import pybedtools as pybedtools_types
23
+ import pyBigWig as pybigwig_types
24
+ import pysam as pysam_types
25
+
26
+ try:
27
+ import pybedtools
28
+ except Exception:
29
+ pybedtools = None # type: ignore
30
+
31
+ try:
32
+ import pyBigWig
33
+ except Exception:
34
+ pyBigWig = None # type: ignore
35
+
36
+ try:
37
+ import pysam
38
+ except Exception:
39
+ pysam = None # type: ignore
40
+
41
+
42
+ def _require_pybedtools() -> "pybedtools_types":
43
+ if pybedtools is not None:
44
+ return pybedtools
45
+ return require("pybedtools", extra="pybedtools", purpose="bedtools Python backend")
46
+
47
+
48
+ def _require_pybigwig() -> "pybigwig_types":
49
+ if pyBigWig is not None:
50
+ return pyBigWig
51
+ return require("pyBigWig", extra="pybigwig", purpose="BigWig Python backend")
52
+
53
+
54
+ def _require_pysam() -> "pysam_types":
55
+ if pysam is not None:
56
+ return pysam
57
+ return require("pysam", extra="pysam", purpose="FASTA indexing")
58
+
59
+
60
+ def _resolve_backend(
61
+ backend: str | None, *, tool: str, python_available: bool, cli_name: str
62
+ ) -> str:
63
+ choice = (backend or "auto").strip().lower()
64
+ if choice not in {"auto", "python", "cli"}:
65
+ raise ValueError(f"{tool}_backend must be one of: auto, python, cli")
66
+ if choice == "python":
67
+ if not python_available:
68
+ raise RuntimeError(
69
+ f"{tool}_backend=python requires the Python package to be installed."
70
+ )
71
+ return "python"
72
+ if choice == "cli":
73
+ if not shutil.which(cli_name):
74
+ raise RuntimeError(f"{tool}_backend=cli requires {cli_name} in PATH.")
75
+ return "cli"
76
+ if shutil.which(cli_name):
77
+ return "cli"
78
+ if python_available:
79
+ return "python"
80
+ raise RuntimeError(f"Neither Python nor CLI backend is available for {tool}.")
81
+
82
+
83
+ def _read_chrom_sizes(chrom_sizes: Path) -> list[tuple[str, int]]:
84
+ sizes: list[tuple[str, int]] = []
85
+ with chrom_sizes.open() as f:
86
+ for line in f:
87
+ chrom, size = line.split()[:2]
88
+ sizes.append((chrom, int(size)))
89
+ return sizes
90
+
91
+
92
+ def _ensure_fasta_index(fasta: Path) -> Path:
93
+ fai = fasta.with_suffix(fasta.suffix + ".fai")
94
+ if fai.exists():
95
+ return fai
96
+ if shutil.which("samtools"):
97
+ cp = subprocess.run(
98
+ ["samtools", "faidx", str(fasta)],
99
+ stdout=subprocess.DEVNULL,
100
+ stderr=subprocess.PIPE,
101
+ text=True,
102
+ )
103
+ if cp.returncode != 0:
104
+ raise RuntimeError(f"samtools faidx failed (exit {cp.returncode}):\n{cp.stderr}")
105
+ return fai
106
+ if pysam is not None:
107
+ pysam_mod = _require_pysam()
108
+ pysam_mod.faidx(str(fasta))
109
+ return fai
110
+ raise RuntimeError("FASTA indexing requires pysam or samtools in PATH.")
111
+
112
+
113
+ def _ensure_chrom_sizes(fasta: Path) -> Path:
114
+ fai = _ensure_fasta_index(fasta)
115
+ chrom_sizes = fasta.with_suffix(".chrom.sizes")
116
+ if chrom_sizes.exists():
117
+ return chrom_sizes
118
+ with fai.open() as f_in, chrom_sizes.open("w") as out:
119
+ for line in f_in:
120
+ chrom, size = line.split()[:2]
121
+ out.write(f"{chrom}\t{size}\n")
122
+ return chrom_sizes
123
+
124
+
125
+ def _bed_to_bigwig(
126
+ fasta: str,
127
+ bed: str,
128
+ *,
129
+ bedtools_backend: str | None = "auto",
130
+ bigwig_backend: str | None = "auto",
131
+ ) -> str:
21
132
  """
22
133
  BED → bedGraph → bigWig
23
134
  Requires:
@@ -28,40 +139,70 @@ def _bed_to_bigwig(fasta: str, bed: str) -> str:
28
139
  fa = Path(fasta) # path to .fa
29
140
  parent = bed.parent
30
141
  stem = bed.stem
31
- fa_stem = fa.stem
32
- fai = parent / f"{fa_stem}.fai"
142
+ chrom_sizes = _ensure_chrom_sizes(fa)
33
143
 
34
144
  bedgraph = parent / f"{stem}.bedgraph"
35
145
  bigwig = parent / f"{stem}.bw"
36
146
 
37
147
  # 1) Compute coverage → bedGraph
38
- logger.debug(f"[pybedtools] generating coverage bedgraph from {bed}")
39
- bt = pybedtools.BedTool(str(bed))
40
- # bedtools genomecov -bg
41
- coverage = bt.genome_coverage(bg=True, genome=str(fai))
42
- coverage.saveas(str(bedgraph))
148
+ bedtools_choice = _resolve_backend(
149
+ bedtools_backend,
150
+ tool="bedtools",
151
+ python_available=pybedtools is not None,
152
+ cli_name="bedtools",
153
+ )
154
+ if bedtools_choice == "python":
155
+ logger.debug(f"[pybedtools] generating coverage bedgraph from {bed}")
156
+ pybedtools_mod = _require_pybedtools()
157
+ bt = pybedtools_mod.BedTool(str(bed))
158
+ # bedtools genomecov -bg
159
+ coverage = bt.genome_coverage(bg=True, genome=str(chrom_sizes))
160
+ coverage.saveas(str(bedgraph))
161
+ else:
162
+ if not shutil.which("bedtools"):
163
+ raise RuntimeError("bedtools is required but not available in PATH.")
164
+ cmd = [
165
+ "bedtools",
166
+ "genomecov",
167
+ "-i",
168
+ str(bed),
169
+ "-g",
170
+ str(chrom_sizes),
171
+ "-bg",
172
+ ]
173
+ logger.debug("[bedtools] generating coverage bedgraph: %s", " ".join(cmd))
174
+ with bedgraph.open("w") as out:
175
+ cp = subprocess.run(cmd, stdout=out, stderr=subprocess.PIPE, text=True)
176
+ if cp.returncode != 0:
177
+ raise RuntimeError(f"bedtools genomecov failed (exit {cp.returncode}):\n{cp.stderr}")
43
178
 
44
179
  # 2) Convert bedGraph → BigWig via pyBigWig
45
- logger.debug(f"[pyBigWig] converting bedgraph → bigwig: {bigwig}")
46
-
47
- # read chrom sizes from the FASTA .fai index
48
- chrom_sizes = {}
49
- with open(fai) as f:
50
- for line in f:
51
- fields = line.strip().split("\t")
52
- chrom = fields[0]
53
- size = int(fields[1])
54
- chrom_sizes[chrom] = size
55
-
56
- bw = pyBigWig.open(str(bigwig), "w")
57
- bw.addHeader(list(chrom_sizes.items()))
58
-
59
- with open(bedgraph) as f:
60
- for line in f:
61
- chrom, start, end, coverage = line.strip().split()
62
- bw.addEntries(chrom, int(start), ends=int(end), values=float(coverage))
63
-
64
- bw.close()
180
+ bigwig_choice = _resolve_backend(
181
+ bigwig_backend,
182
+ tool="bigwig",
183
+ python_available=pyBigWig is not None,
184
+ cli_name="bedGraphToBigWig",
185
+ )
186
+ if bigwig_choice == "python":
187
+ logger.debug(f"[pyBigWig] converting bedgraph → bigwig: {bigwig}")
188
+ pybigwig_mod = _require_pybigwig()
189
+ bw = pybigwig_mod.open(str(bigwig), "w")
190
+ bw.addHeader(_read_chrom_sizes(chrom_sizes))
191
+
192
+ with bedgraph.open() as f:
193
+ for line in f:
194
+ chrom, start, end, coverage = line.strip().split()
195
+ bw.addEntries(chrom, int(start), ends=int(end), values=float(coverage))
196
+
197
+ bw.close()
198
+ else:
199
+ if not shutil.which("bedGraphToBigWig"):
200
+ raise RuntimeError("bedGraphToBigWig is required but not available in PATH.")
201
+ cmd = ["bedGraphToBigWig", str(bedgraph), str(chrom_sizes), str(bigwig)]
202
+ logger.debug("[bedGraphToBigWig] converting bedgraph → bigwig: %s", " ".join(cmd))
203
+ cp = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
204
+ if cp.returncode != 0:
205
+ raise RuntimeError(f"bedGraphToBigWig failed (exit {cp.returncode}):\n{cp.stderr}")
65
206
 
66
207
  logger.debug(f"BigWig written: {bigwig}")
67
208
  return str(bigwig)
@@ -113,6 +254,8 @@ def _plot_bed_histograms(
113
254
  coordinate_mode : {"one_based","zero_based"}
114
255
  One-based, inclusive (your file) vs BED-standard zero-based, half-open.
115
256
  """
257
+ plt = require("matplotlib.pyplot", extra="plotting", purpose="plotting BED histograms")
258
+
116
259
  os.makedirs(plotting_directory, exist_ok=True)
117
260
 
118
261
  bed_basename = os.path.basename(bed_file).rsplit(".bed", 1)[0]
@@ -167,7 +310,8 @@ def _plot_bed_histograms(
167
310
  return np.clip(x, lo, hi)
168
311
 
169
312
  # Load chromosome order/lengths from FASTA
170
- with pysam.FastaFile(fasta) as fa:
313
+ pysam_mod = _require_pysam()
314
+ with pysam_mod.FastaFile(fasta) as fa:
171
315
  ref_names = list(fa.references)
172
316
  ref_lengths = dict(zip(ref_names, fa.lengths))
173
317
 
@@ -292,7 +436,17 @@ def _plot_bed_histograms(
292
436
  logger.debug("[plot_bed_histograms] Done.")
293
437
 
294
438
 
295
- def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
439
+ def aligned_BAM_to_bed(
440
+ aligned_BAM,
441
+ out_dir,
442
+ fasta,
443
+ make_bigwigs,
444
+ threads=None,
445
+ *,
446
+ samtools_backend: str | None = "auto",
447
+ bedtools_backend: str | None = "auto",
448
+ bigwig_backend: str | None = "auto",
449
+ ):
296
450
  """
297
451
  Takes an aligned BAM as input and writes a BED file of reads as output.
298
452
  Bed columns are: Record name, start position, end position, read length, read name, mapping quality, read quality.
@@ -318,31 +472,79 @@ def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
318
472
 
319
473
  logger.debug(f"Creating BED-like file from BAM (with MAPQ and avg base quality): {aligned_BAM}")
320
474
 
321
- with pysam.AlignmentFile(aligned_BAM, "rb") as bam, open(bed_output, "w") as out:
322
- for read in bam.fetch(until_eof=True):
323
- if read.is_unmapped:
324
- chrom = "*"
325
- start1 = 1
326
- rl = read.query_length or 0
327
- mapq = 0
328
- else:
329
- chrom = bam.get_reference_name(read.reference_id)
330
- # pysam reference_start is 0-based → +1 for 1-based SAM-like start
331
- start1 = int(read.reference_start) + 1
332
- rl = read.query_length or 0
333
- mapq = int(read.mapping_quality)
334
-
335
- # End position in 1-based inclusive coords
336
- end1 = start1 + (rl or 0) - 1
337
-
338
- qname = read.query_name
339
- quals = read.query_qualities
340
- if quals is None or rl == 0:
341
- avg_q = float("nan")
342
- else:
343
- avg_q = float(np.mean(quals))
344
-
345
- out.write(f"{chrom}\t{start1}\t{end1}\t{rl}\t{qname}\t{mapq}\t{avg_q:.3f}\n")
475
+ backend_choice = _resolve_backend(
476
+ samtools_backend,
477
+ tool="samtools",
478
+ python_available=pysam is not None,
479
+ cli_name="samtools",
480
+ )
481
+ with open(bed_output, "w") as out:
482
+ if backend_choice == "python":
483
+ pysam_mod = _require_pysam()
484
+ with pysam_mod.AlignmentFile(aligned_BAM, "rb") as bam:
485
+ for read in bam.fetch(until_eof=True):
486
+ if read.is_unmapped:
487
+ chrom = "*"
488
+ start1 = 1
489
+ rl = read.query_length or 0
490
+ mapq = 0
491
+ else:
492
+ chrom = bam.get_reference_name(read.reference_id)
493
+ # pysam reference_start is 0-based → +1 for 1-based SAM-like start
494
+ start1 = int(read.reference_start) + 1
495
+ rl = read.query_length or 0
496
+ mapq = int(read.mapping_quality)
497
+
498
+ # End position in 1-based inclusive coords
499
+ end1 = start1 + (rl or 0) - 1
500
+
501
+ qname = read.query_name
502
+ quals = read.query_qualities
503
+ if quals is None or rl == 0:
504
+ avg_q = float("nan")
505
+ else:
506
+ avg_q = float(np.mean(quals))
507
+
508
+ out.write(f"{chrom}\t{start1}\t{end1}\t{rl}\t{qname}\t{mapq}\t{avg_q:.3f}\n")
509
+ else:
510
+ samtools_view = subprocess.Popen(
511
+ ["samtools", "view", str(aligned_BAM)],
512
+ stdout=subprocess.PIPE,
513
+ stderr=subprocess.PIPE,
514
+ text=True,
515
+ )
516
+ assert samtools_view.stdout is not None
517
+ for line in samtools_view.stdout:
518
+ if not line.strip():
519
+ continue
520
+ fields = line.rstrip("\n").split("\t")
521
+ if len(fields) < 11:
522
+ continue
523
+ qname = fields[0]
524
+ flag = int(fields[1])
525
+ chrom = fields[2]
526
+ pos = int(fields[3])
527
+ mapq = int(fields[4])
528
+ seq = fields[9]
529
+ qual = fields[10]
530
+ rl = 0 if seq == "*" else len(seq)
531
+ is_unmapped = bool(flag & 0x4) or chrom == "*"
532
+ if is_unmapped:
533
+ chrom = "*"
534
+ start1 = 1
535
+ mapq = 0
536
+ else:
537
+ start1 = pos
538
+ end1 = start1 + (rl or 0) - 1
539
+ if qual == "*" or rl == 0:
540
+ avg_q = float("nan")
541
+ else:
542
+ avg_q = float(np.mean([ord(ch) - 33 for ch in qual]))
543
+ out.write(f"{chrom}\t{start1}\t{end1}\t{rl}\t{qname}\t{mapq}\t{avg_q:.3f}\n")
544
+ rc = samtools_view.wait()
545
+ if rc != 0:
546
+ stderr = samtools_view.stderr.read() if samtools_view.stderr else ""
547
+ raise RuntimeError(f"samtools view failed (exit {rc}):\n{stderr}")
346
548
 
347
549
  logger.debug(f"BED-like file created: {bed_output}")
348
550
 
@@ -368,7 +570,15 @@ def aligned_BAM_to_bed(aligned_BAM, out_dir, fasta, make_bigwigs, threads=None):
368
570
  futures = []
369
571
  futures.append(executor.submit(_plot_bed_histograms, aligned_bed, plotting_dir, fasta))
370
572
  if make_bigwigs:
371
- futures.append(executor.submit(_bed_to_bigwig, fasta, aligned_bed))
573
+ futures.append(
574
+ executor.submit(
575
+ _bed_to_bigwig,
576
+ fasta,
577
+ aligned_bed,
578
+ bedtools_backend=bedtools_backend,
579
+ bigwig_backend=bigwig_backend,
580
+ )
581
+ )
372
582
  concurrent.futures.wait(futures)
373
583
 
374
584
  logger.debug("Processing completed successfully.")
@@ -1,3 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
1
4
  def binarize_converted_base_identities(
2
5
  base_identities,
3
6
  strand,
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  # complement_base_list
2
4
 
3
5