levseq 1.3.2__tar.gz → 1.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {levseq-1.3.2/levseq.egg-info → levseq-1.3.3}/PKG-INFO +5 -1
  2. {levseq-1.3.2 → levseq-1.3.3}/README.md +5 -1
  3. {levseq-1.3.2 → levseq-1.3.3}/levseq/__init__.py +2 -2
  4. {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/demultiplex-arm64 +0 -0
  5. levseq-1.3.3/levseq/barcoding/demultiplex-x86 +0 -0
  6. levseq-1.3.3/levseq/filter_orientation.py +115 -0
  7. {levseq-1.3.2 → levseq-1.3.3}/levseq/run_levseq.py +17 -2
  8. {levseq-1.3.2 → levseq-1.3.3/levseq.egg-info}/PKG-INFO +5 -1
  9. {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/SOURCES.txt +1 -0
  10. levseq-1.3.2/levseq/barcoding/demultiplex-x86 +0 -0
  11. {levseq-1.3.2 → levseq-1.3.3}/LICENSE +0 -0
  12. {levseq-1.3.2 → levseq-1.3.3}/MANIFEST.in +0 -0
  13. {levseq-1.3.2 → levseq-1.3.3}/levseq/IO_processor.py +0 -0
  14. {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/__init__.py +0 -0
  15. {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/demultiplex +0 -0
  16. {levseq-1.3.2 → levseq-1.3.3}/levseq/barcoding/minion_barcodes.fasta +0 -0
  17. {levseq-1.3.2 → levseq-1.3.3}/levseq/basecaller.py +0 -0
  18. {levseq-1.3.2 → levseq-1.3.3}/levseq/cmd.py +0 -0
  19. {levseq-1.3.2 → levseq-1.3.3}/levseq/coordinates.py +0 -0
  20. {levseq-1.3.2 → levseq-1.3.3}/levseq/globals.py +0 -0
  21. {levseq-1.3.2 → levseq-1.3.3}/levseq/interface.py +0 -0
  22. {levseq-1.3.2 → levseq-1.3.3}/levseq/parser.py +0 -0
  23. {levseq-1.3.2 → levseq-1.3.3}/levseq/screen.py +0 -0
  24. {levseq-1.3.2 → levseq-1.3.3}/levseq/seqfit.py +0 -0
  25. {levseq-1.3.2 → levseq-1.3.3}/levseq/simulation.py +0 -0
  26. {levseq-1.3.2 → levseq-1.3.3}/levseq/user.py +0 -0
  27. {levseq-1.3.2 → levseq-1.3.3}/levseq/utils.py +0 -0
  28. {levseq-1.3.2 → levseq-1.3.3}/levseq/variantcaller.py +0 -0
  29. {levseq-1.3.2 → levseq-1.3.3}/levseq/visualization.py +0 -0
  30. {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/dependency_links.txt +0 -0
  31. {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/entry_points.txt +0 -0
  32. {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/requires.txt +0 -0
  33. {levseq-1.3.2 → levseq-1.3.3}/levseq.egg-info/top_level.txt +0 -0
  34. {levseq-1.3.2 → levseq-1.3.3}/setup.cfg +0 -0
  35. {levseq-1.3.2 → levseq-1.3.3}/setup.py +0 -0
  36. {levseq-1.3.2 → levseq-1.3.3}/tests/test_demultiplex_docker.py +0 -0
  37. {levseq-1.3.2 → levseq-1.3.3}/tests/test_deploy.py +0 -0
  38. {levseq-1.3.2 → levseq-1.3.3}/tests/test_opligopools.py +0 -0
  39. {levseq-1.3.2 → levseq-1.3.3}/tests/test_seqfitvis.py +0 -0
  40. {levseq-1.3.2 → levseq-1.3.3}/tests/test_seqs.py +0 -0
  41. {levseq-1.3.2 → levseq-1.3.3}/tests/test_statistics.py +0 -0
  42. {levseq-1.3.2 → levseq-1.3.3}/tests/test_variant_calling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: levseq
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Home-page: https://github.com/fhalab/levseq/
5
5
  Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
6
6
  Author-email: ylong@caltech.edu
@@ -87,6 +87,10 @@ conda create --name levseq python=3.12 -y
87
87
  conda activate levseq
88
88
  ```
89
89
 
90
+ ```
91
+ pip install levseq
92
+ ```
93
+
90
94
  #### Dependencies
91
95
 
92
96
  1. Samtools: https://www.htslib.org/download/
@@ -40,6 +40,10 @@ conda create --name levseq python=3.12 -y
40
40
  conda activate levseq
41
41
  ```
42
42
 
43
+ ```
44
+ pip install levseq
45
+ ```
46
+
43
47
  #### Dependencies
44
48
 
45
49
  1. Samtools: https://www.htslib.org/download/
@@ -138,4 +142,4 @@ If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/do
138
142
 
139
143
  #### Contact
140
144
 
141
- Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
145
+ Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
@@ -18,7 +18,7 @@
18
18
  __title__ = 'levseq'
19
19
  __description__ = 'LevSeq nanopore sequencing'
20
20
  __url__ = 'https://github.com/fhalab/levseq/'
21
- __version__ = '1.3.2'
21
+ __version__ = '1.3.3'
22
22
  __author__ = 'Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy'
23
23
  __author_email__ = 'ylong@caltech.edu'
24
24
  __license__ = 'GPL3'
@@ -31,4 +31,4 @@ from levseq.cmd import *
31
31
  from levseq.utils import *
32
32
  from levseq.simulation import *
33
33
  from levseq.user import *
34
-
34
+ from levseq.filter_orientation import *
@@ -0,0 +1,115 @@
1
+ from Bio import SeqIO
2
+ from Bio.Seq import Seq
3
+ import os
4
+ from pathlib import Path
5
+ import logging
6
+ from Bio.Align import PairwiseAligner
7
+ import shutil
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from tqdm import tqdm
10
+
11
+ def calculate_alignment_score(seq1, seq2):
12
+ """Calculate alignment score between two sequences using PairwiseAligner."""
13
+ aligner = PairwiseAligner()
14
+ aligner.mode = 'global'
15
+ alignment = aligner.align(seq1, seq2)[0]
16
+ return alignment.score / max(len(seq1), len(seq2))
17
+
18
+ def filter_single_file(args):
19
+ """
20
+ Filter a single fastq file. Used for parallel processing.
21
+
22
+ Args:
23
+ args: tuple containing (input_file, parent_seq, parent_rev_comp)
24
+ Returns:
25
+ tuple: (file_path, total_reads, kept_reads, filtered_records)
26
+ """
27
+ input_file, parent_seq, parent_rev_comp = args
28
+ kept_reads = []
29
+ total_reads = 0
30
+ kept_count = 0
31
+
32
+ is_forward = "forward" in str(input_file).lower()
33
+
34
+ for record in SeqIO.parse(input_file, "fastq"):
35
+ total_reads += 1
36
+ seq = str(record.seq)
37
+
38
+ forward_score = calculate_alignment_score(seq, str(parent_seq))
39
+ reverse_score = calculate_alignment_score(seq, str(parent_rev_comp))
40
+
41
+ # If it's in forward file (plate barcode was rev comp)
42
+ # Then read should align to reverse complement parent sequence
43
+ if is_forward and reverse_score > forward_score:
44
+ kept_reads.append(record)
45
+ kept_count += 1
46
+ # If it's in reverse file (plate barcode was forward)
47
+ # Then read was already reverse complemented by demultiplexer
48
+ # So it should align to forward parent sequence
49
+ elif not is_forward and forward_score > reverse_score:
50
+ kept_reads.append(record)
51
+ kept_count += 1
52
+
53
+ return str(input_file), total_reads, kept_count, kept_reads
54
+
55
+ def filter_demultiplexed_folder(experiment_folder, parent_sequence, num_threads=8):
56
+ """
57
+ Filter demultiplexed files using multiple threads.
58
+
59
+ Args:
60
+ experiment_folder (str): Path to experiment folder containing RBC/FBC structure
61
+ parent_sequence (str): Parent sequence for alignment checking
62
+ num_threads (int): Number of threads to use
63
+ """
64
+ exp_path = Path(experiment_folder)
65
+ filtered_counts = {}
66
+
67
+ # Prepare parent sequences once
68
+ parent_seq = Seq(parent_sequence)
69
+ parent_rev_comp = parent_seq.reverse_complement()
70
+
71
+ # Collect all fastq files
72
+ fastq_files = []
73
+ for rbc_dir in exp_path.glob("RB*"):
74
+ if not rbc_dir.is_dir():
75
+ continue
76
+ for fbc_dir in rbc_dir.glob("NB*"):
77
+ if not fbc_dir.is_dir():
78
+ continue
79
+ fastq_files.extend(list(fbc_dir.glob("*.fastq")))
80
+
81
+ if not fastq_files:
82
+ logging.warning(f"No fastq files found in {experiment_folder}")
83
+ return filtered_counts
84
+
85
+ # Prepare arguments for parallel processing
86
+ file_args = [(f, parent_seq, parent_rev_comp) for f in fastq_files]
87
+
88
+ # Process files in parallel with progress bar
89
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
90
+ futures = [executor.submit(filter_single_file, args) for args in file_args]
91
+
92
+ with tqdm(total=len(fastq_files), desc="Filtering files") as pbar:
93
+ for future in as_completed(futures):
94
+ try:
95
+ file_path, total, kept, filtered_records = future.result()
96
+
97
+ # Write filtered reads
98
+ temp_file = Path(file_path).parent / f"temp_{Path(file_path).name}"
99
+ SeqIO.write(filtered_records, temp_file, "fastq")
100
+ shutil.move(str(temp_file), file_path)
101
+
102
+ filtered_counts[file_path] = {
103
+ 'total': total,
104
+ 'kept': kept,
105
+ 'filtered': total - kept
106
+ }
107
+
108
+ logging.info(f"Processed {file_path}: {kept}/{total} reads kept")
109
+ pbar.update(1)
110
+
111
+ except Exception as e:
112
+ logging.error(f"Error processing file {file_path}: {str(e)}")
113
+ pbar.update(1)
114
+
115
+ return filtered_counts
@@ -17,7 +17,7 @@
17
17
 
18
18
  # Import MinION objects
19
19
  from levseq import *
20
-
20
+ from levseq.filter_orientation import filter_demultiplexed_folder
21
21
  # Import external packages
22
22
  import logging
23
23
  from pathlib import Path
@@ -472,8 +472,23 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
472
472
  file_to_fastq = cat_fastq_files(cl_args.get("path"), output_dir)
473
473
  try:
474
474
  demux_fastq(output_dir, name_folder, barcode_path)
475
+
476
+ # Add filtering step here with multithreading
477
+ filtered_counts = filter_demultiplexed_folder(
478
+ name_folder,
479
+ refseq,
480
+ num_threads=10
481
+ )
482
+ logging.info(f"Orientation filtering completed for {name}")
483
+ total_reads = sum(counts['total'] for counts in filtered_counts.values())
484
+ kept_reads = sum(counts['kept'] for counts in filtered_counts.values())
485
+ logging.info(f"Total filtering results: {kept_reads}/{total_reads} reads kept ({kept_reads/total_reads*100:.2f}%)")
486
+ for file, counts in filtered_counts.items():
487
+ logging.info(f"{file}: {counts['kept']}/{counts['total']} reads kept")
488
+
489
+
475
490
  except Exception as e:
476
- logging.error("An error occurred during demultiplexing for sample {}. Skipping this sample.".format(name), exc_info=True)
491
+ logging.error("An error occurred during demultiplexing/filtering for sample {}. Skipping this sample.".format(name), exc_info=True)
477
492
  continue
478
493
 
479
494
  if not cl_args["skip_variantcalling"]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: levseq
3
- Version: 1.3.2
3
+ Version: 1.3.3
4
4
  Home-page: https://github.com/fhalab/levseq/
5
5
  Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
6
6
  Author-email: ylong@caltech.edu
@@ -87,6 +87,10 @@ conda create --name levseq python=3.12 -y
87
87
  conda activate levseq
88
88
  ```
89
89
 
90
+ ```
91
+ pip install levseq
92
+ ```
93
+
90
94
  #### Dependencies
91
95
 
92
96
  1. Samtools: https://www.htslib.org/download/
@@ -7,6 +7,7 @@ levseq/__init__.py
7
7
  levseq/basecaller.py
8
8
  levseq/cmd.py
9
9
  levseq/coordinates.py
10
+ levseq/filter_orientation.py
10
11
  levseq/globals.py
11
12
  levseq/interface.py
12
13
  levseq/parser.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes