levseq 1.3.2__tar.gz → 1.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.3.2/levseq.egg-info → levseq-1.3.3}/PKG-INFO +5 -1
- {levseq-1.3.2 → levseq-1.3.3}/README.md +5 -1
- {levseq-1.3.2 → levseq-1.3.3}/levseq/__init__.py +2 -2
- {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/demultiplex-arm64 +0 -0
- levseq-1.3.3/levseq/barcoding/demultiplex-x86 +0 -0
- levseq-1.3.3/levseq/filter_orientation.py +115 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/run_levseq.py +17 -2
- {levseq-1.3.2 → levseq-1.3.3/levseq.egg-info}/PKG-INFO +5 -1
- {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/SOURCES.txt +1 -0
- levseq-1.3.2/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/LICENSE +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/MANIFEST.in +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/IO_processor.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/basecaller.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/cmd.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/coordinates.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/globals.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/interface.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/parser.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/screen.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/seqfit.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/simulation.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/user.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/utils.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/variantcaller.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq/visualization.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/setup.cfg +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/setup.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_deploy.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_opligopools.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_seqfitvis.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_seqs.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_statistics.py +0 -0
- {levseq-1.3.2 → levseq-1.3.3}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.3
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -87,6 +87,10 @@ conda create --name levseq python=3.12 -y
|
|
|
87
87
|
conda activate levseq
|
|
88
88
|
```
|
|
89
89
|
|
|
90
|
+
```
|
|
91
|
+
pip install levseq
|
|
92
|
+
```
|
|
93
|
+
|
|
90
94
|
#### Dependencies
|
|
91
95
|
|
|
92
96
|
1. Samtools: https://www.htslib.org/download/
|
|
@@ -40,6 +40,10 @@ conda create --name levseq python=3.12 -y
|
|
|
40
40
|
conda activate levseq
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
+
```
|
|
44
|
+
pip install levseq
|
|
45
|
+
```
|
|
46
|
+
|
|
43
47
|
#### Dependencies
|
|
44
48
|
|
|
45
49
|
1. Samtools: https://www.htslib.org/download/
|
|
@@ -138,4 +142,4 @@ If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/do
|
|
|
138
142
|
|
|
139
143
|
#### Contact
|
|
140
144
|
|
|
141
|
-
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
145
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.3.
|
|
21
|
+
__version__ = '1.3.3'
|
|
22
22
|
__author__ = 'Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -31,4 +31,4 @@ from levseq.cmd import *
|
|
|
31
31
|
from levseq.utils import *
|
|
32
32
|
from levseq.simulation import *
|
|
33
33
|
from levseq.user import *
|
|
34
|
-
|
|
34
|
+
from levseq.filter_orientation import *
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from Bio import SeqIO
|
|
2
|
+
from Bio.Seq import Seq
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import logging
|
|
6
|
+
from Bio.Align import PairwiseAligner
|
|
7
|
+
import shutil
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
def calculate_alignment_score(seq1, seq2):
|
|
12
|
+
"""Calculate alignment score between two sequences using PairwiseAligner."""
|
|
13
|
+
aligner = PairwiseAligner()
|
|
14
|
+
aligner.mode = 'global'
|
|
15
|
+
alignment = aligner.align(seq1, seq2)[0]
|
|
16
|
+
return alignment.score / max(len(seq1), len(seq2))
|
|
17
|
+
|
|
18
|
+
def filter_single_file(args):
|
|
19
|
+
"""
|
|
20
|
+
Filter a single fastq file. Used for parallel processing.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
args: tuple containing (input_file, parent_seq, parent_rev_comp)
|
|
24
|
+
Returns:
|
|
25
|
+
tuple: (file_path, total_reads, kept_reads, filtered_records)
|
|
26
|
+
"""
|
|
27
|
+
input_file, parent_seq, parent_rev_comp = args
|
|
28
|
+
kept_reads = []
|
|
29
|
+
total_reads = 0
|
|
30
|
+
kept_count = 0
|
|
31
|
+
|
|
32
|
+
is_forward = "forward" in str(input_file).lower()
|
|
33
|
+
|
|
34
|
+
for record in SeqIO.parse(input_file, "fastq"):
|
|
35
|
+
total_reads += 1
|
|
36
|
+
seq = str(record.seq)
|
|
37
|
+
|
|
38
|
+
forward_score = calculate_alignment_score(seq, str(parent_seq))
|
|
39
|
+
reverse_score = calculate_alignment_score(seq, str(parent_rev_comp))
|
|
40
|
+
|
|
41
|
+
# If it's in forward file (plate barcode was rev comp)
|
|
42
|
+
# Then read should align to reverse complement parent sequence
|
|
43
|
+
if is_forward and reverse_score > forward_score:
|
|
44
|
+
kept_reads.append(record)
|
|
45
|
+
kept_count += 1
|
|
46
|
+
# If it's in reverse file (plate barcode was forward)
|
|
47
|
+
# Then read was already reverse complemented by demultiplexer
|
|
48
|
+
# So it should align to forward parent sequence
|
|
49
|
+
elif not is_forward and forward_score > reverse_score:
|
|
50
|
+
kept_reads.append(record)
|
|
51
|
+
kept_count += 1
|
|
52
|
+
|
|
53
|
+
return str(input_file), total_reads, kept_count, kept_reads
|
|
54
|
+
|
|
55
|
+
def filter_demultiplexed_folder(experiment_folder, parent_sequence, num_threads=8):
|
|
56
|
+
"""
|
|
57
|
+
Filter demultiplexed files using multiple threads.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
experiment_folder (str): Path to experiment folder containing RBC/FBC structure
|
|
61
|
+
parent_sequence (str): Parent sequence for alignment checking
|
|
62
|
+
num_threads (int): Number of threads to use
|
|
63
|
+
"""
|
|
64
|
+
exp_path = Path(experiment_folder)
|
|
65
|
+
filtered_counts = {}
|
|
66
|
+
|
|
67
|
+
# Prepare parent sequences once
|
|
68
|
+
parent_seq = Seq(parent_sequence)
|
|
69
|
+
parent_rev_comp = parent_seq.reverse_complement()
|
|
70
|
+
|
|
71
|
+
# Collect all fastq files
|
|
72
|
+
fastq_files = []
|
|
73
|
+
for rbc_dir in exp_path.glob("RB*"):
|
|
74
|
+
if not rbc_dir.is_dir():
|
|
75
|
+
continue
|
|
76
|
+
for fbc_dir in rbc_dir.glob("NB*"):
|
|
77
|
+
if not fbc_dir.is_dir():
|
|
78
|
+
continue
|
|
79
|
+
fastq_files.extend(list(fbc_dir.glob("*.fastq")))
|
|
80
|
+
|
|
81
|
+
if not fastq_files:
|
|
82
|
+
logging.warning(f"No fastq files found in {experiment_folder}")
|
|
83
|
+
return filtered_counts
|
|
84
|
+
|
|
85
|
+
# Prepare arguments for parallel processing
|
|
86
|
+
file_args = [(f, parent_seq, parent_rev_comp) for f in fastq_files]
|
|
87
|
+
|
|
88
|
+
# Process files in parallel with progress bar
|
|
89
|
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
90
|
+
futures = [executor.submit(filter_single_file, args) for args in file_args]
|
|
91
|
+
|
|
92
|
+
with tqdm(total=len(fastq_files), desc="Filtering files") as pbar:
|
|
93
|
+
for future in as_completed(futures):
|
|
94
|
+
try:
|
|
95
|
+
file_path, total, kept, filtered_records = future.result()
|
|
96
|
+
|
|
97
|
+
# Write filtered reads
|
|
98
|
+
temp_file = Path(file_path).parent / f"temp_{Path(file_path).name}"
|
|
99
|
+
SeqIO.write(filtered_records, temp_file, "fastq")
|
|
100
|
+
shutil.move(str(temp_file), file_path)
|
|
101
|
+
|
|
102
|
+
filtered_counts[file_path] = {
|
|
103
|
+
'total': total,
|
|
104
|
+
'kept': kept,
|
|
105
|
+
'filtered': total - kept
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
logging.info(f"Processed {file_path}: {kept}/{total} reads kept")
|
|
109
|
+
pbar.update(1)
|
|
110
|
+
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logging.error(f"Error processing file {file_path}: {str(e)}")
|
|
113
|
+
pbar.update(1)
|
|
114
|
+
|
|
115
|
+
return filtered_counts
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
# Import MinION objects
|
|
19
19
|
from levseq import *
|
|
20
|
-
|
|
20
|
+
from levseq.filter_orientation import filter_demultiplexed_folder
|
|
21
21
|
# Import external packages
|
|
22
22
|
import logging
|
|
23
23
|
from pathlib import Path
|
|
@@ -472,8 +472,23 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
472
472
|
file_to_fastq = cat_fastq_files(cl_args.get("path"), output_dir)
|
|
473
473
|
try:
|
|
474
474
|
demux_fastq(output_dir, name_folder, barcode_path)
|
|
475
|
+
|
|
476
|
+
# Add filtering step here with multithreading
|
|
477
|
+
filtered_counts = filter_demultiplexed_folder(
|
|
478
|
+
name_folder,
|
|
479
|
+
refseq,
|
|
480
|
+
num_threads=10
|
|
481
|
+
)
|
|
482
|
+
logging.info(f"Orientation filtering completed for {name}")
|
|
483
|
+
total_reads = sum(counts['total'] for counts in filtered_counts.values())
|
|
484
|
+
kept_reads = sum(counts['kept'] for counts in filtered_counts.values())
|
|
485
|
+
logging.info(f"Total filtering results: {kept_reads}/{total_reads} reads kept ({kept_reads/total_reads*100:.2f}%)")
|
|
486
|
+
for file, counts in filtered_counts.items():
|
|
487
|
+
logging.info(f"{file}: {counts['kept']}/{counts['total']} reads kept")
|
|
488
|
+
|
|
489
|
+
|
|
475
490
|
except Exception as e:
|
|
476
|
-
logging.error("An error occurred during demultiplexing for sample {}. Skipping this sample.".format(name), exc_info=True)
|
|
491
|
+
logging.error("An error occurred during demultiplexing/filtering for sample {}. Skipping this sample.".format(name), exc_info=True)
|
|
477
492
|
continue
|
|
478
493
|
|
|
479
494
|
if not cl_args["skip_variantcalling"]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.3
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -87,6 +87,10 @@ conda create --name levseq python=3.12 -y
|
|
|
87
87
|
conda activate levseq
|
|
88
88
|
```
|
|
89
89
|
|
|
90
|
+
```
|
|
91
|
+
pip install levseq
|
|
92
|
+
```
|
|
93
|
+
|
|
90
94
|
#### Dependencies
|
|
91
95
|
|
|
92
96
|
1. Samtools: https://www.htslib.org/download/
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|