krewlyzer 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
krewlyzer/uxm.py ADDED
@@ -0,0 +1,188 @@
1
+ import typer
2
+ from pathlib import Path
3
+ from typing import Optional
4
+ import logging
5
+ import pysam
6
+
7
+ import numpy as np
8
+ from rich.console import Console
9
+ from rich.logging import RichHandler
10
+ from concurrent.futures import ProcessPoolExecutor, as_completed
11
+ import os
12
+
13
+ console = Console()
14
+ logging.basicConfig(level="INFO", handlers=[RichHandler(console=console)], format="%(message)s")
15
+ logger = logging.getLogger("uxm")
16
+
17
+ def calc_uxm(
18
+ bam_file: Path,
19
+ mark_file: Path,
20
+ output_file: Path,
21
+ map_quality: int,
22
+ min_cpg: int,
23
+ methy_threshold: float,
24
+ unmethy_threshold: float,
25
+ pe_type: str = "PE"
26
+ ) -> None:
27
+ """
28
+ Calculate UXM fragment-level methylation for a single BAM and marker file.
29
+ Output is a .UXM.tsv file with region, U, X, M proportions.
30
+ """
31
+ try:
32
+ bai = str(bam_file) + ".bai"
33
+ if not os.path.exists(bai):
34
+ pysam.sort("-o", str(bam_file), str(bam_file))
35
+ pysam.index(str(bam_file))
36
+ logger.warning(f"Index file {bai} did not exist. Sorted and indexed BAM.")
37
+ input_file = pysam.AlignmentFile(str(bam_file))
38
+ marks = pybedtools.BedTool(str(mark_file))
39
+ res = []
40
+ for mark in marks:
41
+ region = f"{mark.chrom}:{mark.start}-{mark.end}"
42
+ try:
43
+ input_file.fetch(mark.chrom, mark.start, mark.end)
44
+ except ValueError:
45
+ res.append(f"{region}\t0\t0\t0")
46
+ continue
47
+ Ufragment = 0
48
+ Xfragment = 0
49
+ Mfragment = 0
50
+ if pe_type == "PE":
51
+ from krewlyzer.helpers import read_pair_generator
52
+ region_string = f"{mark.chrom}:{mark.start}-{mark.end}"
53
+ for read1, read2 in read_pair_generator(input_file, region_string):
54
+ if read1 is None or read2 is None:
55
+ continue
56
+ if read1.mapping_quality < map_quality or read2.mapping_quality < map_quality:
57
+ continue
58
+ try:
59
+ m1 = read1.get_tag("XM")
60
+ m2 = read2.get_tag("XM")
61
+ except KeyError:
62
+ continue
63
+ read1Start = read1.reference_start
64
+ read1End = read1.reference_end
65
+ read2Start = read2.reference_start
66
+ read2End = read2.reference_end
67
+ # cfDNAFE logic for overlap
68
+ if not read1.is_reverse: # read1 is forward, read2 is reverse
69
+ if read2Start < read1End:
70
+ overlap = read1End - read2Start
71
+ num_methylated = m1.count("Z") + m2[overlap:].count("Z")
72
+ num_unmethylated = m1.count("z") + m2[overlap:].count("z")
73
+ else:
74
+ num_methylated = m1.count("Z") + m2.count("Z")
75
+ num_unmethylated = m1.count("z") + m2.count("z")
76
+ else: # read1 is reverse, read2 is forward
77
+ if read1Start < read2End:
78
+ overlap = read2End - read1Start
79
+ num_methylated = m2.count("Z") + m1[overlap:].count("Z")
80
+ num_unmethylated = m2.count("z") + m1[overlap:].count("z")
81
+ else:
82
+ num_methylated = m1.count("Z") + m2.count("Z")
83
+ num_unmethylated = m1.count("z") + m2.count("z")
84
+ if num_methylated + num_unmethylated < min_cpg:
85
+ continue
86
+ ratio = num_methylated / (num_methylated + num_unmethylated)
87
+ if ratio >= methy_threshold:
88
+ Mfragment += 1
89
+ elif ratio <= unmethy_threshold:
90
+ Ufragment += 1
91
+ else:
92
+ Xfragment += 1
93
+ elif pe_type == "SE":
94
+ for read in input_file.fetch(mark.chrom, mark.start, mark.end):
95
+ if read.mapping_quality < map_quality:
96
+ continue
97
+ try:
98
+ m = read.get_tag("XM")
99
+ except KeyError:
100
+ continue
101
+ num_methylated = m.count("Z")
102
+ num_unmethylated = m.count("z")
103
+ if num_methylated + num_unmethylated < min_cpg:
104
+ continue
105
+ ratio = num_methylated / (num_methylated + num_unmethylated)
106
+ if ratio >= methy_threshold:
107
+ Mfragment += 1
108
+ elif ratio <= unmethy_threshold:
109
+ Ufragment += 1
110
+ else:
111
+ Xfragment += 1
112
+ else:
113
+ logger.error("type must be SE or PE")
114
+ raise typer.Exit(1)
115
+ total = Mfragment + Ufragment + Xfragment
116
+ if total == 0:
117
+ res.append(f"{region}\t0\t0\t0")
118
+ else:
119
+ tmp_array = np.zeros(3)
120
+ tmp_array[0] = Ufragment / total
121
+ tmp_array[1] = Xfragment / total
122
+ tmp_array[2] = Mfragment / total
123
+ res.append(f"{region}\t" + "\t".join(map(str, tmp_array)))
124
+ with open(output_file, 'w') as f:
125
+ f.write('region\tU\tX\tM\n')
126
+ for i in res:
127
+ f.write(i + '\n')
128
+ logger.info(f"UXM calculation complete for {bam_file}. Results in {output_file}.")
129
+ except Exception as e:
130
+ logger.error(f"Fatal error in calc_uxm: {e}")
131
+ raise typer.Exit(1)
132
+
133
+ def uxm(
134
+ bam_path: Path = typer.Argument(..., help="Folder containing .bam files for UXM calculation."),
135
+ mark_input: Optional[Path] = typer.Option(None, "--mark-input", "-m", help="Marker BED file (default: packaged atlas)", show_default=False),
136
+ output: Path = typer.Option(..., "--output", "-o", help="Output folder for results"),
137
+ map_quality: int = typer.Option(30, "--map-quality", "-q", help="Minimum mapping quality"),
138
+ min_cpg: int = typer.Option(4, "--min-cpg", "-c", help="Minimum CpG count per fragment"),
139
+ methy_threshold: float = typer.Option(0.75, "--methy-threshold", "-tM", help="Methylation threshold for M fragments"),
140
+ unmethy_threshold: float = typer.Option(0.25, "--unmethy-threshold", "-tU", help="Unmethylation threshold for U fragments"),
141
+ pe_type: str = typer.Option("SE", "--type", help="Fragment type: SE or PE (default: SE)"),
142
+ threads: int = typer.Option(1, "--threads", "-t", help="Number of parallel processes (default: 1)")
143
+ ) -> None:
144
+ """
145
+ Calculate fragment-level methylation (UXM) features for all BAM files in a folder.
146
+ """
147
+ # Input checks
148
+ if not bam_path.exists():
149
+ logger.error(f"Input BAM directory not found: {bam_path}")
150
+ raise typer.Exit(1)
151
+ if mark_input and not mark_input.exists():
152
+ logger.error(f"Marker BED file not found: {mark_input}")
153
+ raise typer.Exit(1)
154
+ try:
155
+ output.mkdir(parents=True, exist_ok=True)
156
+ except Exception as e:
157
+ logger.error(f"Could not create output directory {output}: {e}")
158
+ raise typer.Exit(1)
159
+ if mark_input is None:
160
+ pkg_dir = Path(__file__).parent
161
+ mark_input = pkg_dir / "data/MethMark/Atlas.U25.l4.hg19.bed"
162
+ bam_files = [f for f in Path(bam_path).glob("*.bam")]
163
+ output = Path(output)
164
+ output.mkdir(parents=True, exist_ok=True)
165
+ def run_uxm_file(bam_file):
166
+ sample_prefix = bam_file.stem.replace('.bam', '')
167
+ output_file = output / f"{sample_prefix}.UXM.tsv"
168
+ calc_uxm(
169
+ bam_file,
170
+ mark_input,
171
+ output_file,
172
+ map_quality,
173
+ min_cpg,
174
+ methy_threshold,
175
+ unmethy_threshold,
176
+ pe_type
177
+ )
178
+ return str(output_file)
179
+ with ProcessPoolExecutor(max_workers=threads) as executor:
180
+ futures = {executor.submit(run_uxm_file, bam_file): bam_file for bam_file in bam_files}
181
+ for future in as_completed(futures):
182
+ bam_file = futures[future]
183
+ try:
184
+ result = future.result()
185
+ logger.info(f"UXM calculated: {result}")
186
+ except Exception as exc:
187
+ logger.error(f"UXM calculation failed for {bam_file}: {exc}")
188
+ logger.info(f"UXM features calculated for {len(bam_files)} files.")
krewlyzer/wps.py ADDED
@@ -0,0 +1,264 @@
1
+ import typer
2
+ from pathlib import Path
3
+ import logging
4
+ import pysam
5
+
6
+ import numpy as np
7
+ from collections import defaultdict
8
+ import gzip
9
+ from rich.console import Console
10
+ from rich.logging import RichHandler
11
+
12
+ from .helpers import max_core, commonError
13
+
14
+ console = Console()
15
+ logging.basicConfig(level="INFO", handlers=[RichHandler(console=console)], format="%(message)s")
16
+ logger = logging.getLogger("wps")
17
+
18
+
19
+ import pandas as pd
20
+
21
+ def _calc_wps(
22
+ bedgz_input: str | Path,
23
+ tsv_input: str | Path,
24
+ output_file_pattern: str,
25
+ empty: bool = False,
26
+ protect_input: int = 120,
27
+ min_size: int = 120,
28
+ max_size: int = 180
29
+ ):
30
+ """
31
+ Calculate Windowed Protection Score (WPS) for a single .bed.gz file and transcript region file.
32
+ Output is gzipped TSV per region.
33
+ Optimized with vectorized operations.
34
+ """
35
+ try:
36
+ bedgzfile = str(bedgz_input)
37
+ tbx = pysam.TabixFile(bedgzfile)
38
+ protection = protect_input // 2
39
+
40
+ logger.info(f"Processing {bedgz_input} with regions from {tsv_input}")
41
+
42
+ with open(tsv_input, 'r') as infile:
43
+ valid_chroms = set(map(str, list(range(1, 23)) + ["X", "Y"]))
44
+
45
+ for line in infile:
46
+ if not line.strip():
47
+ continue
48
+ parts = line.split()
49
+ if len(parts) < 5:
50
+ continue
51
+ cid, chrom, start_str, end_str, strand = parts[:5]
52
+ chrom = chrom.replace("chr", "")
53
+ if chrom not in valid_chroms:
54
+ continue
55
+
56
+ region_start = int(float(start_str))
57
+ region_end = int(float(end_str))
58
+
59
+ if region_start < 1:
60
+ continue
61
+
62
+ # Region length (inclusive)
63
+ length = region_end - region_start + 1
64
+
65
+ # Arrays for the region (0-based index relative to region_start)
66
+ cov_arr = np.zeros(length, dtype=int)
67
+ start_arr = np.zeros(length, dtype=int)
68
+ gcount_arr = np.zeros(length, dtype=int)
69
+ total_arr = np.zeros(length, dtype=int)
70
+
71
+ # Fetch reads
72
+ # Pysam fetch is 0-based.
73
+ # Region 1-based [S, E] -> 0-based [S-1, E)
74
+ # We need reads extending 'protection' bp around the region
75
+ fetch_start = max(0, region_start - protection - 1)
76
+ fetch_end = region_end + protection
77
+
78
+ # Ensure 'chr' prefix for fetching as per original logic
79
+ fetch_chrom = "chr" + chrom if not chrom.startswith("chr") else chrom
80
+
81
+ try:
82
+ rows = list(tbx.fetch(fetch_chrom, fetch_start, fetch_end, parser=pysam.asTuple()))
83
+ except ValueError:
84
+ # Try without chr prefix if failed?
85
+ try:
86
+ rows = list(tbx.fetch(chrom, fetch_start, fetch_end, parser=pysam.asTuple()))
87
+ except ValueError:
88
+ rows = []
89
+ except Exception as e:
90
+ logger.error(f"Error fetching region {chrom}:{region_start}-{region_end}: {e}")
91
+ continue
92
+
93
+ if not rows:
94
+ if not empty:
95
+ continue
96
+
97
+ # Process reads
98
+ for row in rows:
99
+ # BED is 0-based start, 0-based exclusive end
100
+ rstart = int(row[1])
101
+ rend = int(row[2])
102
+ lseq = rend - rstart
103
+
104
+ if lseq < min_size or lseq > max_size:
105
+ continue
106
+
107
+ # Convert read to 1-based inclusive for easier logic with 1-based regions
108
+ r_start_1 = rstart + 1
109
+ r_end_1 = rend
110
+
111
+ # 1. Coverage (covCount)
112
+ # Read spans [r_start_1, r_end_1]
113
+ ov_start = max(region_start, r_start_1)
114
+ ov_end = min(region_end, r_end_1)
115
+
116
+ if ov_start <= ov_end:
117
+ idx_start = ov_start - region_start
118
+ idx_end = ov_end - region_start + 1
119
+ cov_arr[idx_start:idx_end] += 1
120
+
121
+ # 2. Start Count (ends)
122
+ if region_start <= r_start_1 <= region_end:
123
+ start_arr[r_start_1 - region_start] += 1
124
+ if region_start <= r_end_1 <= region_end:
125
+ start_arr[r_end_1 - region_start] += 1
126
+
127
+ # 3. WPS
128
+ # gcount (spanning): window [k-P, k+P] is inside read
129
+ # Range: [r_start_1 + P, r_end_1 - P]
130
+ g_start = r_start_1 + protection
131
+ g_end = r_end_1 - protection
132
+
133
+ g_ov_start = max(region_start, g_start)
134
+ g_ov_end = min(region_end, g_end)
135
+
136
+ if g_ov_start <= g_ov_end:
137
+ idx_start = g_ov_start - region_start
138
+ idx_end = g_ov_end - region_start + 1
139
+ gcount_arr[idx_start:idx_end] += 1
140
+
141
+ # total (overlapping): window [k-P, k+P] overlaps read
142
+ # Range: [r_start_1 - P, r_end_1 + P]
143
+ t_start = r_start_1 - protection
144
+ t_end = r_end_1 + protection
145
+
146
+ t_ov_start = max(region_start, t_start)
147
+ t_ov_end = min(region_end, t_end)
148
+
149
+ if t_ov_start <= t_ov_end:
150
+ idx_start = t_ov_start - region_start
151
+ idx_end = t_ov_end - region_start + 1
152
+ total_arr[idx_start:idx_end] += 1
153
+
154
+ # WPS = Spanning - (Total - Spanning) = 2 * Spanning - Total
155
+ wps_arr = 2 * gcount_arr - total_arr
156
+
157
+ # Check if we should write
158
+ if np.sum(cov_arr) == 0 and not empty:
159
+ continue
160
+
161
+ # Prepare output
162
+ filename = output_file_pattern % cid
163
+
164
+ positions = np.arange(region_start, region_end + 1)
165
+ df = pd.DataFrame({
166
+ 'chrom': chrom,
167
+ 'pos': positions,
168
+ 'cov': cov_arr,
169
+ 'starts': start_arr,
170
+ 'wps': wps_arr
171
+ })
172
+
173
+ if strand == "-":
174
+ df = df.iloc[::-1]
175
+
176
+ with gzip.open(filename, 'wt') as outfile:
177
+ for _, row in df.iterrows():
178
+ outfile.write(f"{row['chrom']}\t{int(row['pos'])}\t{int(row['cov'])}\t{int(row['starts'])}\t{int(row['wps'])}\n")
179
+
180
+ logger.info(f"WPS calculation complete. Results written to pattern: {output_file_pattern}")
181
+ except Exception as e:
182
+ logger.error(f"Fatal error in _calc_wps: {e}")
183
+ raise typer.Exit(1)
184
+
185
+
186
+ def wps(
187
+ bedgz_path: Path = typer.Argument(..., help="Folder containing .bed.gz files (should be the output directory from motif.py)"),
188
+ tsv_input: Path = typer.Option(None, "--tsv-input", "-t", help="Path to transcript/region file (TSV format)"),
189
+ output: Path = typer.Option(..., "--output", "-o", help="Output folder for results"),
190
+ wpstype: str = typer.Option('L', "--wpstype", "-w", help="WPS type: 'L' for long (default), 'S' for short"),
191
+ empty: bool = typer.Option(False, "--empty", help="Keep files of empty blocks (default: False)"),
192
+ threads: int = typer.Option(1, "--threads", "-p", help="Number of threads (default: 1)")
193
+ ):
194
+ """
195
+ Calculate Windowed Protection Score (WPS) features for all .bed.gz files in a folder.
196
+ """
197
+ # Input checks
198
+ if not bedgz_path.exists():
199
+ logger.error(f"Input directory not found: {bedgz_path}")
200
+ raise typer.Exit(1)
201
+ if tsv_input and not tsv_input.exists():
202
+ logger.error(f"Transcript region file not found: {tsv_input}")
203
+ raise typer.Exit(1)
204
+ try:
205
+ output.mkdir(parents=True, exist_ok=True)
206
+ except Exception as e:
207
+ logger.error(f"Could not create output directory {output}: {e}")
208
+ raise typer.Exit(1)
209
+ try:
210
+ output.touch()
211
+ except Exception as e:
212
+ logger.error(f"Output directory {output} is not writable: {e}")
213
+ raise typer.Exit(1)
214
+ try:
215
+ bedgz_files = list(Path(bedgz_path).glob("*.bed.gz"))
216
+ if not bedgz_files:
217
+ logger.error("No .bed.gz files found in the specified folder.")
218
+ raise typer.Exit(1)
219
+ if tsv_input is None:
220
+ # Default to package data transcriptAnno-hg19-1kb.tsv
221
+ tsv_input = Path(__file__).parent.parent / "data" / "TranscriptAnno" / "transcriptAnno-hg19-1kb.tsv"
222
+ logger.info(f"No tsv_input specified. Using default: {tsv_input}")
223
+ if not tsv_input.exists():
224
+ logger.error(f"Transcript/region file does not exist: {tsv_input}")
225
+ raise typer.Exit(1)
226
+ if wpstype == 'L':
227
+ protect_input = 120
228
+ min_size = 120
229
+ max_size = 180
230
+ else:
231
+ protect_input = 16
232
+ min_size = 35
233
+ max_size = 80
234
+ output.mkdir(parents=True, exist_ok=True)
235
+ logger.info(f"Calculating WPS for {len(bedgz_files)} files...")
236
+ from concurrent.futures import ProcessPoolExecutor, as_completed
237
+ import traceback
238
+ def wps_task(bedgz_file):
239
+ try:
240
+ output_file_pattern = str(output / (bedgz_file.stem.replace('.bed', '') + ".%s.WPS.tsv.gz"))
241
+ _calc_wps(
242
+ bedgz_input=str(bedgz_file),
243
+ tsv_input=str(tsv_input),
244
+ output_file_pattern=output_file_pattern,
245
+ empty=empty,
246
+ protect_input=protect_input,
247
+ min_size=min_size,
248
+ max_size=max_size
249
+ )
250
+ return None
251
+ except Exception as exc:
252
+ return traceback.format_exc()
253
+ n_procs = max_core(threads) if threads else 1
254
+ logger.info(f"Calculating WPS for {len(bedgz_files)} files using {n_procs} processes...")
255
+ with ProcessPoolExecutor(max_workers=n_procs) as executor:
256
+ futures = {executor.submit(wps_task, bedgz_file): bedgz_file for bedgz_file in bedgz_files}
257
+ for future in as_completed(futures):
258
+ exc = future.result()
259
+ if exc:
260
+ logger.error(f"WPS calculation failed for {futures[future]}:\n{exc}")
261
+ logger.info(f"WPS features calculated for {len(bedgz_files)} files.")
262
+ except Exception as e:
263
+ logger.error(f"Fatal error in wps CLI: {e}")
264
+ raise typer.Exit(1)
krewlyzer/wrapper.py ADDED
@@ -0,0 +1,147 @@
1
+ import typer
2
+ from pathlib import Path
3
+ import logging
4
+ from rich.console import Console
5
+ from rich.logging import RichHandler
6
+ from .motif import motif
7
+ from .fsc import fsc
8
+ from .fsr import fsr
9
+ from .fsd import fsd
10
+ from .wps import wps
11
+ from .ocf import ocf
12
+ from .uxm import uxm
13
+ from .mfsd import mfsd
14
+ from typing import Optional
15
+
16
+ console = Console()
17
+ logging.basicConfig(level="INFO", handlers=[RichHandler(console=console)], format="%(message)s")
18
+ logger = logging.getLogger("krewlyzer-wrapper")
19
+
20
+
21
+ def run_all(
22
+ bam_file: Path = typer.Argument(..., help="Input BAM file (sorted, indexed)"),
23
+ reference: Path = typer.Option(..., "--reference", "-g", help="Reference genome FASTA file for motif extraction"),
24
+ output: Path = typer.Option(..., "--output", "-o", help="Output directory for all results"),
25
+ variant_input: Optional[Path] = typer.Option(None, "--variant-input", "-v", help="Input VCF/MAF file for mFSD analysis"),
26
+ threads: int = typer.Option(1, "--threads", "-t", help="Number of parallel processes for each step"),
27
+ pe_type: str = typer.Option("SE", "--type", help="Fragment type for UXM: SE or PE (default: SE)")
28
+ ):
29
+ """
30
+ Run all feature extraction commands (motif, fsc, fsr, fsd, wps, ocf, uxm, mfsd) for a single BAM file.
31
+ """
32
+ # Input checks
33
+ if not bam_file.exists() or not bam_file.is_file():
34
+ logger.error(f"Input BAM file not found: {bam_file}")
35
+ raise typer.Exit(1)
36
+ if not reference.exists() or not reference.is_file():
37
+ logger.error(f"Reference FASTA file not found: {reference}")
38
+ raise typer.Exit(1)
39
+ try:
40
+ output.mkdir(parents=True, exist_ok=True)
41
+ except Exception as e:
42
+ logger.error(f"Could not create output directory {output}: {e}")
43
+ raise typer.Exit(1)
44
+ # 1. Motif extraction
45
+ motif_output = output / "motif"
46
+ try:
47
+ motif(
48
+ bam_path=bam_file,
49
+ reference=reference,
50
+ output=motif_output,
51
+ minlen=65,
52
+ maxlen=400,
53
+ k=3,
54
+ verbose=True,
55
+ threads=threads,
56
+ )
57
+ except Exception as e:
58
+ logger.error(f"Motif extraction failed: {e}")
59
+ raise typer.Exit(1)
60
+ # 2. FSC
61
+ fsc_output = output / "fsc"
62
+ try:
63
+ fsc(
64
+ bedgz_path=motif_output,
65
+ output=fsc_output,
66
+ threads=threads
67
+ )
68
+ except Exception as e:
69
+ logger.error(f"FSC calculation failed: {e}")
70
+ raise typer.Exit(1)
71
+ # 3. FSR
72
+ fsr_output = output / "fsr"
73
+ try:
74
+ fsr(
75
+ bedgz_path=motif_output,
76
+ output=fsr_output,
77
+ threads=threads
78
+ )
79
+ except Exception as e:
80
+ logger.error(f"FSR calculation failed: {e}")
81
+ raise typer.Exit(1)
82
+ # 4. FSD
83
+ fsd_output = output / "fsd"
84
+ try:
85
+ fsd(
86
+ bedgz_path=motif_output,
87
+ output=fsd_output,
88
+ arms_file=None,
89
+ threads=threads
90
+ )
91
+ except Exception as e:
92
+ logger.error(f"FSD calculation failed: {e}")
93
+ raise typer.Exit(1)
94
+ # 5. WPS
95
+ wps_output = output / "wps"
96
+ try:
97
+ wps(
98
+ bedgz_path=motif_output,
99
+ output=wps_output,
100
+ threads=threads
101
+ )
102
+ except Exception as e:
103
+ logger.error(f"WPS calculation failed: {e}")
104
+ raise typer.Exit(1)
105
+ # 6. OCF
106
+ ocf_output = output / "ocf"
107
+ try:
108
+ ocf(
109
+ bedgz_path=motif_output,
110
+ output=ocf_output,
111
+ threads=threads
112
+ )
113
+ except Exception as e:
114
+ logger.error(f"OCF calculation failed: {e}")
115
+ raise typer.Exit(1)
116
+ # 7. UXM
117
+ uxm_output = output / "uxm"
118
+ try:
119
+ uxm(
120
+ bam_path=bam_file.parent,
121
+ output=uxm_output,
122
+ pe_type=pe_type,
123
+ threads=threads
124
+ )
125
+ except Exception as e:
126
+ logger.error(f"UXM calculation failed: {e}")
127
+ raise typer.Exit(1)
128
+
129
+ # 8. mFSD (Optional)
130
+ if variant_input:
131
+ if not variant_input.exists():
132
+ logger.warning(f"Variant input file not found: {variant_input}. Skipping mFSD.")
133
+ else:
134
+ mfsd_output = output / "mfsd" / (bam_file.stem + ".mfsd.tsv")
135
+ try:
136
+ mfsd(
137
+ bam_path=bam_file,
138
+ input_file=variant_input,
139
+ output=mfsd_output,
140
+ format="auto",
141
+ map_quality=20
142
+ )
143
+ except Exception as e:
144
+ logger.error(f"mFSD calculation failed: {e}")
145
+ # Don't raise exit here, just log error as it's optional
146
+
147
+ logger.info(f"All feature extraction complete. Results saved to {output}")
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.4
2
+ Name: krewlyzer
3
+ Version: 0.1.4
4
+ Summary: Feature extraction tools for circulating tumor DNA from GRCh37 aligned BAM files
5
+ Author-email: Ronak Shah <shahr2@mskcc.org>
6
+ Project-URL: Homepage, https://github.com/msk-access/krewlyzer
7
+ Requires-Python: >=3.8
8
+ License-File: LICENSE
9
+ Requires-Dist: typer>=0.12.3
10
+ Requires-Dist: pysam>=0.20.0
11
+ Requires-Dist: pandas>=2.0.0
12
+ Requires-Dist: biopython>=1.81
13
+ Requires-Dist: scikit-misc>=0.1.4
14
+ Requires-Dist: scipy>=1.10.0
15
+ Requires-Dist: rich>=13.0.0
16
+ Provides-Extra: docs
17
+ Requires-Dist: mkdocs>=1.5.0; extra == "docs"
18
+ Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest>=7.0.0; extra == "test"
21
+ Requires-Dist: pytest-mock>=3.10.0; extra == "test"
22
+ Dynamic: license-file
@@ -0,0 +1,18 @@
1
+ krewlyzer/__init__.py,sha256=trkZFdHJ1IoLrxZb8PVQe2wSGn_ojPODNqHbjLSL1_c,90
2
+ krewlyzer/cli.py,sha256=4a_gl59n4RguPMFsRi7OZ0-uhAfk7PFDOgFdUvD1a1A,1553
3
+ krewlyzer/fsc.py,sha256=O3VZz-lY-s603Pq-94txL49vBSbxSMPIxOmVykUvHS8,14548
4
+ krewlyzer/fsd.py,sha256=u91sb8WLNv6t0DBwg_SwFL-RLaWPGNb2f4LrBKzh0wQ,6857
5
+ krewlyzer/fsr.py,sha256=ee5ZijYjUAdIvs2TVeBvmm6qQwUCp6XjenVQUSeXCBQ,9411
6
+ krewlyzer/helpers.py,sha256=NA7YBfIRYKj_ZdjiwG96yOyd8IC03VPgBBokfj1a1_g,8540
7
+ krewlyzer/mfsd.py,sha256=C7xAaTPSU-KOL0Mvu5nYBugfCidOVN9YWaFZmDfe0Uw,9451
8
+ krewlyzer/motif.py,sha256=6xCocdFAn0BBssxWzXhGxHac0OHvVKRg6ovQQ-XLr_c,20610
9
+ krewlyzer/ocf.py,sha256=cSIy8ax-EWVONarFZgvunIexbP2q39oY66R0i9Xd9-0,5832
10
+ krewlyzer/uxm.py,sha256=LOyEXCDQimucTTwl2A95dfHJDFyKK0S6MpREpFnOVXI,8482
11
+ krewlyzer/wps.py,sha256=7-u_VJ2kdYfvq-CSuCrCTRoRjlIRMJopB5K0jAr1G-E,11279
12
+ krewlyzer/wrapper.py,sha256=V6BvaxlHl1z7JBMtQC1AteNrTQ9oYf2hHIleje3Cb-M,4715
13
+ krewlyzer-0.1.4.dist-info/licenses/LICENSE,sha256=DuJF49YfFt6g7la7cekWI06XA7ImodNiTEOrEBsOkpk,32365
14
+ krewlyzer-0.1.4.dist-info/METADATA,sha256=Za75HoZckHD7LjPZmbrzI3lckh0YPFztei4E4R9uxiI,767
15
+ krewlyzer-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ krewlyzer-0.1.4.dist-info/entry_points.txt,sha256=x9Wngsqelv0MxiUvgDDx3hVNR-MhDbASxwKpNeBiX8I,48
17
+ krewlyzer-0.1.4.dist-info/top_level.txt,sha256=bFO6hK-X3pxPGZ7ewoOVx80u9p_Npyy9NW8XophctdY,10
18
+ krewlyzer-0.1.4.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ krewlyzer = krewlyzer.cli:app