levseq 1.2.7__tar.gz → 1.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.2.7/levseq.egg-info → levseq-1.2.9}/PKG-INFO +14 -4
- {levseq-1.2.7 → levseq-1.2.9}/README.md +13 -3
- {levseq-1.2.7 → levseq-1.2.9}/levseq/__init__.py +1 -1
- {levseq-1.2.7 → levseq-1.2.9}/levseq/utils.py +12 -7
- {levseq-1.2.7 → levseq-1.2.9}/levseq/variantcaller.py +2 -1
- {levseq-1.2.7 → levseq-1.2.9/levseq.egg-info}/PKG-INFO +14 -4
- {levseq-1.2.7 → levseq-1.2.9}/levseq.egg-info/SOURCES.txt +1 -0
- levseq-1.2.9/tests/test_deploy.py +91 -0
- {levseq-1.2.7 → levseq-1.2.9}/LICENSE +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/MANIFEST.in +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/IO_processor.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/basecaller.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/cmd.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/coordinates.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/globals.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/interface.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/parser.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/run_levseq.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/screen.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/seqfit.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/simulation.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/user.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq/visualization.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/setup.cfg +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/setup.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_opligopools.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_seqfitvis.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_seqs.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_statistics.py +0 -0
- {levseq-1.2.7 → levseq-1.2.9}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -49,7 +49,7 @@ Requires-Dist: biopandas
|
|
|
49
49
|
|
|
50
50
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
51
51
|
|
|
52
|
-

|
|
53
53
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
54
54
|
|
|
55
55
|
|
|
@@ -60,7 +60,7 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
60
60
|
|
|
61
61
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
62
62
|
|
|
63
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
63
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
64
64
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
65
65
|
|
|
66
66
|
## How to Use LevSeq
|
|
@@ -171,7 +171,17 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
171
171
|
|
|
172
172
|
#### Citing
|
|
173
173
|
|
|
174
|
-
If you have found LevSeq useful, please cite
|
|
174
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
175
|
+
|
|
176
|
+
```bibtex
|
|
177
|
+
@article{long2024levseq,
|
|
178
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
179
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
180
|
+
journal={ACS Synthetic Biology},
|
|
181
|
+
year={2024},
|
|
182
|
+
publisher={American Chemical Society}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
175
185
|
|
|
176
186
|
#### Contact
|
|
177
187
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
7
7
|
|
|
8
8
|
|
|
@@ -13,7 +13,7 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
13
13
|
|
|
14
14
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
15
15
|
|
|
16
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
16
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
17
17
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
18
18
|
|
|
19
19
|
## How to Use LevSeq
|
|
@@ -124,7 +124,17 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
124
124
|
|
|
125
125
|
#### Citing
|
|
126
126
|
|
|
127
|
-
If you have found LevSeq useful, please cite
|
|
127
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
128
|
+
|
|
129
|
+
```bibtex
|
|
130
|
+
@article{long2024levseq,
|
|
131
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
132
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
133
|
+
journal={ACS Synthetic Biology},
|
|
134
|
+
year={2024},
|
|
135
|
+
publisher={American Chemical Society}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
128
138
|
|
|
129
139
|
#### Contact
|
|
130
140
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.2.
|
|
21
|
+
__version__ = '1.2.9'
|
|
22
22
|
__author__ = 'Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -214,8 +214,10 @@ def calculate_mutation_significance_across_well(seq_df):
|
|
|
214
214
|
# Do multiple test correction to correct each of the pvalues
|
|
215
215
|
for p in ['p_value', 'p(a)', 'p(t)', 'p(g)', 'p(c)', 'p(n)', 'p(i)']:
|
|
216
216
|
# Do B.H which is the simplest possibly change to have alpha be a variable! ToDo :D
|
|
217
|
-
padjs =
|
|
218
|
-
|
|
217
|
+
padjs = seq_df[p].values * len(seq_df)
|
|
218
|
+
# The multiple test correction was sometimes returning 0 so we're updating to just do bonferroni
|
|
219
|
+
#multipletests(seq_df[p].values, alpha=0.05, method='fdr_bh')
|
|
220
|
+
seq_df[f'{p} adj.'] = padjs #padjs[1]
|
|
219
221
|
return seq_df
|
|
220
222
|
|
|
221
223
|
def alignment_from_cigar(cigar: str, alignment: str, ref: str, query_qualities: list):
|
|
@@ -246,8 +248,8 @@ def alignment_from_cigar(cigar: str, alignment: str, ref: str, query_qualities:
|
|
|
246
248
|
pos += op_len
|
|
247
249
|
ref_pos += op_len
|
|
248
250
|
elif op == 1: # insertion to the reference
|
|
249
|
-
inserts[
|
|
250
|
-
|
|
251
|
+
inserts[ref_pos - 1] = alignment[pos - 1:pos + op_len]
|
|
252
|
+
new_seq = new_seq[:-1] + 'I' # Set the previous position to be an insertion
|
|
251
253
|
pos += op_len
|
|
252
254
|
elif op == 2: # deletion from the reference
|
|
253
255
|
new_seq += '-' * op_len
|
|
@@ -487,12 +489,15 @@ def get_variant_label_for_well(seq_df, threshold):
|
|
|
487
489
|
label = '_'.join(label)
|
|
488
490
|
# Only keep the frequency of the most frequent mutation
|
|
489
491
|
probability = np.mean([x for x in non_refs['percent_most_freq_mutation'].values])
|
|
490
|
-
# Combine the values
|
|
491
|
-
|
|
492
|
+
# Combine the values -> looks like fishers works maybe only if there are > 1
|
|
493
|
+
if len(non_refs) > 1:
|
|
494
|
+
chi2_statistic, combined_p_value = combine_pvalues([x for x in non_refs['p_value adj.'].values], method='fisher')
|
|
495
|
+
else:
|
|
496
|
+
combined_p_value = non_refs['p_value adj.'].values[0]
|
|
492
497
|
else:
|
|
493
498
|
label = '#PARENT#'
|
|
494
499
|
probability = np.mean([1 - x for x in non_refs['freq_non_ref'].values])
|
|
495
500
|
combined_p_value = float("nan")
|
|
496
501
|
# Return also the mean mutation rate for the well
|
|
497
502
|
mean_mutation_rate = np.mean([1 - x for x in non_refs['freq_non_ref'].values])
|
|
498
|
-
return label, probability, combined_p_value, mixed_well, mean_mutation_rate
|
|
503
|
+
return label, probability, combined_p_value, mixed_well, mean_mutation_rate
|
|
@@ -27,6 +27,7 @@ from Bio import SeqIO
|
|
|
27
27
|
import re
|
|
28
28
|
from tqdm import tqdm
|
|
29
29
|
import warnings
|
|
30
|
+
import math
|
|
30
31
|
'''
|
|
31
32
|
Script for variant calling
|
|
32
33
|
|
|
@@ -213,7 +214,7 @@ class VariantCaller:
|
|
|
213
214
|
self.variant_df['Variant'] = [self.variant_dict[b_id].get('Variant') for b_id in self.variant_df['ID'].values]
|
|
214
215
|
self.variant_df['Mixed Well'] = [self.variant_dict[b_id].get('Mixed Well') for b_id in self.variant_df['ID'].values]
|
|
215
216
|
self.variant_df['Average mutation frequency'] = [self.variant_dict[b_id].get('Average mutation frequency') for b_id in self.variant_df['ID'].values]
|
|
216
|
-
self.variant_df['P value'] = [self.variant_dict[b_id].get('P value')
|
|
217
|
+
self.variant_df['P value'] = [self.variant_dict[b_id].get('P value') for b_id in self.variant_df['ID'].values]
|
|
217
218
|
self.variant_df['Alignment Count'] = [self.variant_dict[b_id].get('Alignment Count') for b_id in self.variant_df['ID'].values]
|
|
218
219
|
self.variant_df['Average error rate'] = [self.variant_dict[b_id].get('Average error rate') for b_id in self.variant_df['ID'].values]
|
|
219
220
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -49,7 +49,7 @@ Requires-Dist: biopandas
|
|
|
49
49
|
|
|
50
50
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
51
51
|
|
|
52
|
-

|
|
53
53
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
54
54
|
|
|
55
55
|
|
|
@@ -60,7 +60,7 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
60
60
|
|
|
61
61
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
62
62
|
|
|
63
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
63
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
64
64
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
65
65
|
|
|
66
66
|
## How to Use LevSeq
|
|
@@ -171,7 +171,17 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
171
171
|
|
|
172
172
|
#### Citing
|
|
173
173
|
|
|
174
|
-
If you have found LevSeq useful, please cite
|
|
174
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
175
|
+
|
|
176
|
+
```bibtex
|
|
177
|
+
@article{long2024levseq,
|
|
178
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
179
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
180
|
+
journal={ACS Synthetic Biology},
|
|
181
|
+
year={2024},
|
|
182
|
+
publisher={American Chemical Society}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
175
185
|
|
|
176
186
|
#### Contact
|
|
177
187
|
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
import shutil
|
|
19
|
+
import tempfile
|
|
20
|
+
import unittest
|
|
21
|
+
import matplotlib.pyplot as plt
|
|
22
|
+
from levseq import *
|
|
23
|
+
from levseq.run_levseq import process_ref_csv
|
|
24
|
+
u = SciUtil()
|
|
25
|
+
import math
|
|
26
|
+
|
|
27
|
+
class TestClass(unittest.TestCase):
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def setup_class(self):
|
|
31
|
+
local = True
|
|
32
|
+
# Create a base object since it will be the same for all the tests
|
|
33
|
+
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
34
|
+
|
|
35
|
+
self.data_dir = os.path.join(THIS_DIR, 'test_data/')
|
|
36
|
+
if local:
|
|
37
|
+
self.tmp_dir = os.path.join(THIS_DIR, 'test_data/tmp/')
|
|
38
|
+
if os.path.exists(self.tmp_dir):
|
|
39
|
+
shutil.rmtree(self.tmp_dir)
|
|
40
|
+
os.mkdir(self.tmp_dir)
|
|
41
|
+
else:
|
|
42
|
+
self.tmp_dir = tempfile.mkdtemp(prefix='test_data')
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def teardown_class(self):
|
|
46
|
+
shutil.rmtree(self.tmp_dir)
|
|
47
|
+
|
|
48
|
+
class TestDeploy(TestClass):
|
|
49
|
+
|
|
50
|
+
def test_deploy(self):
|
|
51
|
+
cmd_list = [
|
|
52
|
+
'docker', # Needs to be installed as vina.
|
|
53
|
+
'run',
|
|
54
|
+
'--rm',
|
|
55
|
+
'-v',
|
|
56
|
+
f'{os.getcwd()}:/levseq_results',
|
|
57
|
+
'levseq',
|
|
58
|
+
'test_deploy',
|
|
59
|
+
'test_data/laragen_run/levseq-1.2.7/',
|
|
60
|
+
'test_data/laragen_run/20241116-LevSeq-Review-Validation-levseq_ref.csv'
|
|
61
|
+
]
|
|
62
|
+
# ToDo: add in scoring function for ad4
|
|
63
|
+
cmd_return = subprocess.run(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
64
|
+
print(cmd_return.stdout, cmd_return)
|
|
65
|
+
|
|
66
|
+
def test_variant_calling(self):
|
|
67
|
+
# Take as input the demultiplexed fastq files and the reference csv file
|
|
68
|
+
cl_args = {'skip_demultiplexing': True, 'skip_variantcalling': False}
|
|
69
|
+
cl_args["name"] = 'test_deploy'
|
|
70
|
+
cl_args['path'] = 'test_data/laragen_run/levseq-1.2.7/'
|
|
71
|
+
cl_args["summary"] = 'test_data/laragen_run/20241116-LevSeq-Review-Validation-levseq_ref.csv'
|
|
72
|
+
variant_df, ref_df = process_ref_csv(cl_args)
|
|
73
|
+
# Now we want to check all the variants are the same as in the original case:
|
|
74
|
+
checked_variants_df = pd.read_csv('test_data/laragen_run/levseq-1.2.7/variants_gold_standard.csv')
|
|
75
|
+
checked_variants = checked_variants_df['Variant'].values
|
|
76
|
+
checked_sig = checked_variants_df['P adj. value'].values
|
|
77
|
+
i = 0
|
|
78
|
+
for variant, pval in variant_df[['Variant', 'P adj. value']].values:
|
|
79
|
+
print(variant, checked_variants[i])
|
|
80
|
+
if checked_variants[i]:
|
|
81
|
+
if variant:
|
|
82
|
+
assert variant == checked_variants[i]
|
|
83
|
+
# if pval < 0.05:
|
|
84
|
+
# assert checked_sig[i] < 0.05
|
|
85
|
+
# elif math.isnan(pval):
|
|
86
|
+
# assert math.isnan(checked_sig[i])
|
|
87
|
+
# else:
|
|
88
|
+
# assert checked_sig[i] >= 0.05
|
|
89
|
+
print(pval, checked_sig[i])
|
|
90
|
+
i += 1
|
|
91
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|