levseq 1.2.6__tar.gz → 1.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.2.6/levseq.egg-info → levseq-1.2.9}/PKG-INFO +19 -5
- {levseq-1.2.6 → levseq-1.2.9}/README.md +18 -4
- {levseq-1.2.6 → levseq-1.2.9}/levseq/__init__.py +1 -1
- {levseq-1.2.6 → levseq-1.2.9}/levseq/run_levseq.py +18 -10
- {levseq-1.2.6 → levseq-1.2.9}/levseq/utils.py +12 -7
- {levseq-1.2.6 → levseq-1.2.9}/levseq/variantcaller.py +7 -5
- {levseq-1.2.6 → levseq-1.2.9/levseq.egg-info}/PKG-INFO +19 -5
- {levseq-1.2.6 → levseq-1.2.9}/levseq.egg-info/SOURCES.txt +1 -0
- levseq-1.2.9/tests/test_deploy.py +91 -0
- {levseq-1.2.6 → levseq-1.2.9}/LICENSE +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/MANIFEST.in +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/IO_processor.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/basecaller.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/cmd.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/coordinates.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/globals.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/interface.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/parser.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/screen.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/seqfit.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/simulation.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/user.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq/visualization.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/setup.cfg +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/setup.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_opligopools.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_seqfitvis.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_seqs.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_statistics.py +0 -0
- {levseq-1.2.6 → levseq-1.2.9}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -49,18 +49,18 @@ Requires-Dist: biopandas
|
|
|
49
49
|
|
|
50
50
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
51
51
|
|
|
52
|
-

|
|
53
53
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
- Data to reproduce the results and to test are available on zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
57
|
-
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://
|
|
57
|
+
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://levseqdb.streamlit.app/) and code to host locally [here](https://github.com/fhalab/LevSeq_db)
|
|
58
58
|
|
|
59
59
|
## Setup
|
|
60
60
|
|
|
61
61
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
62
62
|
|
|
63
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
63
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
64
64
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
65
65
|
|
|
66
66
|
## How to Use LevSeq
|
|
@@ -171,4 +171,18 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
171
171
|
|
|
172
172
|
#### Citing
|
|
173
173
|
|
|
174
|
-
If you have found LevSeq useful, please cite
|
|
174
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
175
|
+
|
|
176
|
+
```bibtex
|
|
177
|
+
@article{long2024levseq,
|
|
178
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
179
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
180
|
+
journal={ACS Synthetic Biology},
|
|
181
|
+
year={2024},
|
|
182
|
+
publisher={American Chemical Society}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### Contact
|
|
187
|
+
|
|
188
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -2,18 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
- Data to reproduce the results and to test are available on zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
10
|
-
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://
|
|
10
|
+
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://levseqdb.streamlit.app/) and code to host locally [here](https://github.com/fhalab/LevSeq_db)
|
|
11
11
|
|
|
12
12
|
## Setup
|
|
13
13
|
|
|
14
14
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
15
15
|
|
|
16
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
16
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
17
17
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
18
18
|
|
|
19
19
|
## How to Use LevSeq
|
|
@@ -124,4 +124,18 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
124
124
|
|
|
125
125
|
#### Citing
|
|
126
126
|
|
|
127
|
-
If you have found LevSeq useful, please cite
|
|
127
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
128
|
+
|
|
129
|
+
```bibtex
|
|
130
|
+
@article{long2024levseq,
|
|
131
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
132
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
133
|
+
journal={ACS Synthetic Biology},
|
|
134
|
+
year={2024},
|
|
135
|
+
publisher={American Chemical Society}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### Contact
|
|
140
|
+
|
|
141
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.2.
|
|
21
|
+
__version__ = '1.2.9'
|
|
22
22
|
__author__ = 'Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -275,11 +275,11 @@ def create_df_v(variants_df):
|
|
|
275
275
|
)
|
|
276
276
|
# Fill in 'Deletion' in 'aa_variant' column
|
|
277
277
|
df_variants_.loc[
|
|
278
|
-
df_variants_["nc_variant"] == "
|
|
279
|
-
] = "
|
|
278
|
+
df_variants_["nc_variant"] == "#DEL#", "aa_variant"
|
|
279
|
+
] = "#DEL#"
|
|
280
280
|
df_variants_.loc[
|
|
281
|
-
df_variants_["nc_variant"] == "
|
|
282
|
-
] = "
|
|
281
|
+
df_variants_["nc_variant"] == "#INS#", "aa_variant"
|
|
282
|
+
] = "#INS#"
|
|
283
283
|
|
|
284
284
|
# Compare aa_variant with translated refseq and generate Substitutions column
|
|
285
285
|
df_variants_["Substitutions"] = df_variants_.apply(get_mutations, axis=1)
|
|
@@ -291,7 +291,7 @@ def create_df_v(variants_df):
|
|
|
291
291
|
# Fill in Deletion into Substitutions Column, keep #N.A.# unchanged
|
|
292
292
|
for i in df_variants_.index:
|
|
293
293
|
if df_variants_["nc_variant"].iloc[i] == "Deletion":
|
|
294
|
-
df_variants_.Substitutions.iat[i] = df_variants_.Substitutions.iat[i].replace("", "
|
|
294
|
+
df_variants_.Substitutions.iat[i] = df_variants_.Substitutions.iat[i].replace("", "#DEL#")
|
|
295
295
|
elif df_variants_["nc_variant"].iloc[i] == "#N.A.#":
|
|
296
296
|
df_variants_.Substitutions.iat[i] = "#N.A.#"
|
|
297
297
|
|
|
@@ -363,9 +363,9 @@ def create_nc_variant(variant, refseq):
|
|
|
363
363
|
elif variant == "#PARENT#":
|
|
364
364
|
return refseq
|
|
365
365
|
elif "DEL" in variant:
|
|
366
|
-
return "
|
|
366
|
+
return "#DEL#"
|
|
367
367
|
elif variant == '+':
|
|
368
|
-
return "
|
|
368
|
+
return "#INS#"
|
|
369
369
|
else:
|
|
370
370
|
mutations = variant.split("_")
|
|
371
371
|
nc_variant = list(refseq)
|
|
@@ -465,7 +465,7 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
465
465
|
logging.info(f"Fasta file for {name} already exists. Skipping write.")
|
|
466
466
|
|
|
467
467
|
barcode_path = filter_bc(cl_args, name_folder, i)
|
|
468
|
-
output_dir = Path(result_folder) / "
|
|
468
|
+
output_dir = Path(result_folder) / f"{cl_args['name']}_fastq"
|
|
469
469
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
470
470
|
|
|
471
471
|
if not cl_args["skip_demultiplexing"]:
|
|
@@ -491,17 +491,25 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
491
491
|
continue
|
|
492
492
|
|
|
493
493
|
variant_df.to_csv(variant_csv_path, index=False)
|
|
494
|
-
return variant_df
|
|
494
|
+
return variant_df, ref_df
|
|
495
495
|
|
|
496
496
|
# Main function to run LevSeq and ensure saving of intermediate results if an error occurs
|
|
497
497
|
def run_LevSeq(cl_args, tqdm_fn=tqdm.tqdm):
|
|
498
498
|
result_folder = create_result_folder(cl_args)
|
|
499
|
+
# Ref folder for saving ref csv file
|
|
500
|
+
ref_folder = os.path.join(result_folder, "ref")
|
|
501
|
+
os.makedirs(ref_folder, exist_ok=True)
|
|
502
|
+
|
|
499
503
|
configure_logging(result_folder)
|
|
504
|
+
logging.info("Logging configured. Starting program.")
|
|
500
505
|
|
|
501
506
|
variant_df = pd.DataFrame(columns=["barcode_plate", "name", "refseq", "variant"])
|
|
502
507
|
|
|
503
508
|
try:
|
|
504
|
-
variant_df = process_ref_csv(cl_args, tqdm_fn)
|
|
509
|
+
variant_df, ref_df = process_ref_csv(cl_args, tqdm_fn)
|
|
510
|
+
ref_df_path = os.path.join(ref_folder, cl_args["name"]+".csv")
|
|
511
|
+
ref_df.to_csv(ref_df_path, index=False)
|
|
512
|
+
|
|
505
513
|
if variant_df.empty:
|
|
506
514
|
logging.warning("No data found during CSV processing. The CSV is empty.")
|
|
507
515
|
except Exception as e:
|
|
@@ -214,8 +214,10 @@ def calculate_mutation_significance_across_well(seq_df):
|
|
|
214
214
|
# Do multiple test correction to correct each of the pvalues
|
|
215
215
|
for p in ['p_value', 'p(a)', 'p(t)', 'p(g)', 'p(c)', 'p(n)', 'p(i)']:
|
|
216
216
|
# Do B.H which is the simplest possibly change to have alpha be a variable! ToDo :D
|
|
217
|
-
padjs =
|
|
218
|
-
|
|
217
|
+
padjs = seq_df[p].values * len(seq_df)
|
|
218
|
+
# The multiple test correction was sometimes returning 0 so we're updating to just do bonferroni
|
|
219
|
+
#multipletests(seq_df[p].values, alpha=0.05, method='fdr_bh')
|
|
220
|
+
seq_df[f'{p} adj.'] = padjs #padjs[1]
|
|
219
221
|
return seq_df
|
|
220
222
|
|
|
221
223
|
def alignment_from_cigar(cigar: str, alignment: str, ref: str, query_qualities: list):
|
|
@@ -246,8 +248,8 @@ def alignment_from_cigar(cigar: str, alignment: str, ref: str, query_qualities:
|
|
|
246
248
|
pos += op_len
|
|
247
249
|
ref_pos += op_len
|
|
248
250
|
elif op == 1: # insertion to the reference
|
|
249
|
-
inserts[
|
|
250
|
-
|
|
251
|
+
inserts[ref_pos - 1] = alignment[pos - 1:pos + op_len]
|
|
252
|
+
new_seq = new_seq[:-1] + 'I' # Set the previous position to be an insertion
|
|
251
253
|
pos += op_len
|
|
252
254
|
elif op == 2: # deletion from the reference
|
|
253
255
|
new_seq += '-' * op_len
|
|
@@ -487,12 +489,15 @@ def get_variant_label_for_well(seq_df, threshold):
|
|
|
487
489
|
label = '_'.join(label)
|
|
488
490
|
# Only keep the frequency of the most frequent mutation
|
|
489
491
|
probability = np.mean([x for x in non_refs['percent_most_freq_mutation'].values])
|
|
490
|
-
# Combine the values
|
|
491
|
-
|
|
492
|
+
# Combine the values -> looks like fishers works maybe only if there are > 1
|
|
493
|
+
if len(non_refs) > 1:
|
|
494
|
+
chi2_statistic, combined_p_value = combine_pvalues([x for x in non_refs['p_value adj.'].values], method='fisher')
|
|
495
|
+
else:
|
|
496
|
+
combined_p_value = non_refs['p_value adj.'].values[0]
|
|
492
497
|
else:
|
|
493
498
|
label = '#PARENT#'
|
|
494
499
|
probability = np.mean([1 - x for x in non_refs['freq_non_ref'].values])
|
|
495
500
|
combined_p_value = float("nan")
|
|
496
501
|
# Return also the mean mutation rate for the well
|
|
497
502
|
mean_mutation_rate = np.mean([1 - x for x in non_refs['freq_non_ref'].values])
|
|
498
|
-
return label, probability, combined_p_value, mixed_well, mean_mutation_rate
|
|
503
|
+
return label, probability, combined_p_value, mixed_well, mean_mutation_rate
|
|
@@ -27,6 +27,7 @@ from Bio import SeqIO
|
|
|
27
27
|
import re
|
|
28
28
|
from tqdm import tqdm
|
|
29
29
|
import warnings
|
|
30
|
+
import math
|
|
30
31
|
'''
|
|
31
32
|
Script for variant calling
|
|
32
33
|
|
|
@@ -37,11 +38,12 @@ The variant caller starts from demultiplexed fastq files.
|
|
|
37
38
|
3) Call variant with soft alignment
|
|
38
39
|
|
|
39
40
|
'''
|
|
40
|
-
|
|
41
|
-
logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
41
|
+
|
|
42
42
|
logger = logging.getLogger(__name__)
|
|
43
|
-
#
|
|
44
|
-
|
|
43
|
+
logger.setLevel(logging.WARNING) # Set default level for this module
|
|
44
|
+
# Use the logger in this file
|
|
45
|
+
logger.warning("This is a warning message.")
|
|
46
|
+
logger.info("This won't show unless logging is configured to INFO elsewhere.")
|
|
45
47
|
|
|
46
48
|
class VariantCaller:
|
|
47
49
|
"""
|
|
@@ -212,7 +214,7 @@ class VariantCaller:
|
|
|
212
214
|
self.variant_df['Variant'] = [self.variant_dict[b_id].get('Variant') for b_id in self.variant_df['ID'].values]
|
|
213
215
|
self.variant_df['Mixed Well'] = [self.variant_dict[b_id].get('Mixed Well') for b_id in self.variant_df['ID'].values]
|
|
214
216
|
self.variant_df['Average mutation frequency'] = [self.variant_dict[b_id].get('Average mutation frequency') for b_id in self.variant_df['ID'].values]
|
|
215
|
-
self.variant_df['P value'] = [self.variant_dict[b_id].get('P value')
|
|
217
|
+
self.variant_df['P value'] = [self.variant_dict[b_id].get('P value') for b_id in self.variant_df['ID'].values]
|
|
216
218
|
self.variant_df['Alignment Count'] = [self.variant_dict[b_id].get('Alignment Count') for b_id in self.variant_df['ID'].values]
|
|
217
219
|
self.variant_df['Average error rate'] = [self.variant_dict[b_id].get('Average error rate') for b_id in self.variant_df['ID'].values]
|
|
218
220
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.9
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -49,18 +49,18 @@ Requires-Dist: biopandas
|
|
|
49
49
|
|
|
50
50
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
51
51
|
|
|
52
|
-

|
|
53
53
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
- Data to reproduce the results and to test are available on zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
57
|
-
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://
|
|
57
|
+
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://levseqdb.streamlit.app/) and code to host locally [here](https://github.com/fhalab/LevSeq_db)
|
|
58
58
|
|
|
59
59
|
## Setup
|
|
60
60
|
|
|
61
61
|
For setting up the experimental side of LevSeq we suggest the following preparations:
|
|
62
62
|
|
|
63
|
-
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](
|
|
63
|
+
- Order forward and reverse primers compatible with the desired plasmid, see methods section of [our paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
64
64
|
- Successfully install Oxford Nanopore's software (this is only for if you are doing basecalling/minION processing). [Link to installation guide](https://nanoporetech.com/).
|
|
65
65
|
|
|
66
66
|
## How to Use LevSeq
|
|
@@ -171,4 +171,18 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
171
171
|
|
|
172
172
|
#### Citing
|
|
173
173
|
|
|
174
|
-
If you have found LevSeq useful, please cite
|
|
174
|
+
If you have found LevSeq useful, please cite our [paper](https://pubs.acs.org/doi/10.1021/acssynbio.4c00625).
|
|
175
|
+
|
|
176
|
+
```bibtex
|
|
177
|
+
@article{long2024levseq,
|
|
178
|
+
title={LevSeq: Rapid Generation of Sequence-Function Data for Directed Evolution and Machine Learning},
|
|
179
|
+
author={Long, Yueming and Mora, Ariane and Li, Francesca-Zhoufan and Gürsoy, Emre and Johnston, Kadina E and Arnold, Frances H},
|
|
180
|
+
journal={ACS Synthetic Biology},
|
|
181
|
+
year={2024},
|
|
182
|
+
publisher={American Chemical Society}
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### Contact
|
|
187
|
+
|
|
188
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
import shutil
|
|
19
|
+
import tempfile
|
|
20
|
+
import unittest
|
|
21
|
+
import matplotlib.pyplot as plt
|
|
22
|
+
from levseq import *
|
|
23
|
+
from levseq.run_levseq import process_ref_csv
|
|
24
|
+
u = SciUtil()
|
|
25
|
+
import math
|
|
26
|
+
|
|
27
|
+
class TestClass(unittest.TestCase):
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def setup_class(self):
|
|
31
|
+
local = True
|
|
32
|
+
# Create a base object since it will be the same for all the tests
|
|
33
|
+
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
34
|
+
|
|
35
|
+
self.data_dir = os.path.join(THIS_DIR, 'test_data/')
|
|
36
|
+
if local:
|
|
37
|
+
self.tmp_dir = os.path.join(THIS_DIR, 'test_data/tmp/')
|
|
38
|
+
if os.path.exists(self.tmp_dir):
|
|
39
|
+
shutil.rmtree(self.tmp_dir)
|
|
40
|
+
os.mkdir(self.tmp_dir)
|
|
41
|
+
else:
|
|
42
|
+
self.tmp_dir = tempfile.mkdtemp(prefix='test_data')
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def teardown_class(self):
|
|
46
|
+
shutil.rmtree(self.tmp_dir)
|
|
47
|
+
|
|
48
|
+
class TestDeploy(TestClass):
|
|
49
|
+
|
|
50
|
+
def test_deploy(self):
|
|
51
|
+
cmd_list = [
|
|
52
|
+
'docker', # Needs to be installed as vina.
|
|
53
|
+
'run',
|
|
54
|
+
'--rm',
|
|
55
|
+
'-v',
|
|
56
|
+
f'{os.getcwd()}:/levseq_results',
|
|
57
|
+
'levseq',
|
|
58
|
+
'test_deploy',
|
|
59
|
+
'test_data/laragen_run/levseq-1.2.7/',
|
|
60
|
+
'test_data/laragen_run/20241116-LevSeq-Review-Validation-levseq_ref.csv'
|
|
61
|
+
]
|
|
62
|
+
# ToDo: add in scoring function for ad4
|
|
63
|
+
cmd_return = subprocess.run(cmd_list, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
64
|
+
print(cmd_return.stdout, cmd_return)
|
|
65
|
+
|
|
66
|
+
def test_variant_calling(self):
|
|
67
|
+
# Take as input the demultiplexed fastq files and the reference csv file
|
|
68
|
+
cl_args = {'skip_demultiplexing': True, 'skip_variantcalling': False}
|
|
69
|
+
cl_args["name"] = 'test_deploy'
|
|
70
|
+
cl_args['path'] = 'test_data/laragen_run/levseq-1.2.7/'
|
|
71
|
+
cl_args["summary"] = 'test_data/laragen_run/20241116-LevSeq-Review-Validation-levseq_ref.csv'
|
|
72
|
+
variant_df, ref_df = process_ref_csv(cl_args)
|
|
73
|
+
# Now we want to check all the variants are the same as in the original case:
|
|
74
|
+
checked_variants_df = pd.read_csv('test_data/laragen_run/levseq-1.2.7/variants_gold_standard.csv')
|
|
75
|
+
checked_variants = checked_variants_df['Variant'].values
|
|
76
|
+
checked_sig = checked_variants_df['P adj. value'].values
|
|
77
|
+
i = 0
|
|
78
|
+
for variant, pval in variant_df[['Variant', 'P adj. value']].values:
|
|
79
|
+
print(variant, checked_variants[i])
|
|
80
|
+
if checked_variants[i]:
|
|
81
|
+
if variant:
|
|
82
|
+
assert variant == checked_variants[i]
|
|
83
|
+
# if pval < 0.05:
|
|
84
|
+
# assert checked_sig[i] < 0.05
|
|
85
|
+
# elif math.isnan(pval):
|
|
86
|
+
# assert math.isnan(checked_sig[i])
|
|
87
|
+
# else:
|
|
88
|
+
# assert checked_sig[i] >= 0.05
|
|
89
|
+
print(pval, checked_sig[i])
|
|
90
|
+
i += 1
|
|
91
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|