levseq 1.4.1__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.4.1/levseq.egg-info → levseq-1.4.3}/PKG-INFO +52 -3
- {levseq-1.4.1 → levseq-1.4.3}/README.md +38 -1
- {levseq-1.4.1 → levseq-1.4.3}/levseq/__init__.py +1 -1
- {levseq-1.4.1 → levseq-1.4.3}/levseq/interface.py +60 -1
- {levseq-1.4.1 → levseq-1.4.3}/levseq/run_levseq.py +1 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/utils.py +6 -4
- {levseq-1.4.1 → levseq-1.4.3/levseq.egg-info}/PKG-INFO +52 -3
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_variant_calling.py +1 -1
- {levseq-1.4.1 → levseq-1.4.3}/LICENSE +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/MANIFEST.in +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/IO_processor.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/basecaller.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/cmd.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/coordinates.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/filter_orientation.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/globals.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/parser.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/screen.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/seqfit.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/simulation.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/user.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/variantcaller.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq/visualization.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq.egg-info/SOURCES.txt +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/setup.cfg +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/setup.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_copy_fastq.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_deploy.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_opligopools.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_seqfitvis.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_seqs.py +0 -0
- {levseq-1.4.1 → levseq-1.4.3}/tests/test_statistics.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -44,6 +44,18 @@ Requires-Dist: scikit-learn
|
|
|
44
44
|
Requires-Dist: statsmodels
|
|
45
45
|
Requires-Dist: tqdm
|
|
46
46
|
Requires-Dist: biopandas
|
|
47
|
+
Dynamic: author
|
|
48
|
+
Dynamic: author-email
|
|
49
|
+
Dynamic: classifier
|
|
50
|
+
Dynamic: description
|
|
51
|
+
Dynamic: description-content-type
|
|
52
|
+
Dynamic: home-page
|
|
53
|
+
Dynamic: keywords
|
|
54
|
+
Dynamic: license
|
|
55
|
+
Dynamic: license-file
|
|
56
|
+
Dynamic: project-url
|
|
57
|
+
Dynamic: requires-dist
|
|
58
|
+
Dynamic: requires-python
|
|
47
59
|
|
|
48
60
|
# Variant Sequencing with Nanopore (LevSeq)
|
|
49
61
|
|
|
@@ -51,9 +63,21 @@ LevSeq provides a streamlined pipeline for sequencing and analyzing genetic vari
|
|
|
51
63
|
|
|
52
64
|

|
|
53
65
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
66
|
+
## Notes
|
|
67
|
+
|
|
68
|
+
LevSeq was designed for epPCR and SSM experiments, however, we are currently extending it to work for other enzyme engineering designs as well, the current features are under development:
|
|
69
|
+
|
|
70
|
+
1. Insertion handling (see version 4.1.3) - thanks to Brian Zhong for his contributions to this section!
|
|
71
|
+
2. Gene calling (handling different genes, use the `--oligopool` flag)
|
|
72
|
+
|
|
73
|
+
If you notice any issues with new features or have adapted the LevSeq code for your own use cases, we would love community contributions! Please submit either an issue, or a pull request and we will aim to incorperate the changes.
|
|
54
74
|
|
|
55
75
|
## Quick Start
|
|
56
76
|
|
|
77
|
+
Note the current stable version is: `1.4.2`, the latest version is `1.4.3`.
|
|
78
|
+
|
|
79
|
+
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
80
|
+
|
|
57
81
|
### Docker Installation (Recommended)
|
|
58
82
|
|
|
59
83
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
@@ -69,7 +93,10 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
69
93
|
```bash
|
|
70
94
|
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
71
95
|
```
|
|
72
|
-
|
|
96
|
+
4. Connect function data to your sequence data
|
|
97
|
+
```bash
|
|
98
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
99
|
+
```
|
|
73
100
|
### Pip Installation (Mac/Linux only)
|
|
74
101
|
|
|
75
102
|
**IMPORTANT**: On Mac M-series chips (M1-M4), gcc 13 and 14 are **REQUIRED**:
|
|
@@ -98,6 +125,18 @@ brew install gcc@13 gcc@14
|
|
|
98
125
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
99
126
|
```
|
|
100
127
|
|
|
128
|
+
5. Combine function data:
|
|
129
|
+
```bash
|
|
130
|
+
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Note for function data we currently expect a LCMS file e.g. with the columns:
|
|
134
|
+
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
135
|
+
- `Area` (which becomes fitness value).
|
|
136
|
+
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
137
|
+
- The last `_X.csv` needs to be the barcode number to match that sample to your plate e.g. if you ran LevSeq with barcode 33 for plate 2 you need to have `_33.csv` for the fitness file for plate 2. e.g. `some_fitnes_for_plate_2_33.csv`.
|
|
138
|
+
|
|
139
|
+
|
|
101
140
|
## Data and Visualization
|
|
102
141
|
|
|
103
142
|
- **Test Data**: Sample data is available on Zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
@@ -168,6 +207,16 @@ For the wet lab protocol:
|
|
|
168
207
|
- **Advanced Usage**: See the [manuscript notebook](https://github.com/fhalab/LevSeq/blob/main/manuscript/notebooks/epPCR_10plates.ipynb)
|
|
169
208
|
- **Troubleshooting**: See our [computational protocols wiki](https://github.com/fhalab/LevSeq/wiki/Computational-protocols)
|
|
170
209
|
|
|
210
|
+
### Local development or install of latest version
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
conda create --name levseq python=3.10
|
|
214
|
+
git clone git@github.com:fhalab/LevSeq.git
|
|
215
|
+
cd LevSeq
|
|
216
|
+
python setup.py sdist bdist_wheel
|
|
217
|
+
pip install dist/levseq-1.4.3.tar.gz
|
|
218
|
+
```
|
|
219
|
+
|
|
171
220
|
## Citing LevSeq
|
|
172
221
|
|
|
173
222
|
If you find LevSeq useful, please cite our paper:
|
|
@@ -4,9 +4,21 @@ LevSeq provides a streamlined pipeline for sequencing and analyzing genetic vari
|
|
|
4
4
|
|
|
5
5
|

|
|
6
6
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
7
|
+
## Notes
|
|
8
|
+
|
|
9
|
+
LevSeq was designed for epPCR and SSM experiments, however, we are currently extending it to work for other enzyme engineering designs as well, the current features are under development:
|
|
10
|
+
|
|
11
|
+
1. Insertion handling (see version 4.1.3) - thanks to Brian Zhong for his contributions to this section!
|
|
12
|
+
2. Gene calling (handling different genes, use the `--oligopool` flag)
|
|
13
|
+
|
|
14
|
+
If you notice any issues with new features or have adapted the LevSeq code for your own use cases, we would love community contributions! Please submit either an issue, or a pull request and we will aim to incorperate the changes.
|
|
7
15
|
|
|
8
16
|
## Quick Start
|
|
9
17
|
|
|
18
|
+
Note the current stable version is: `1.4.2`, the latest version is `1.4.3`.
|
|
19
|
+
|
|
20
|
+
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
21
|
+
|
|
10
22
|
### Docker Installation (Recommended)
|
|
11
23
|
|
|
12
24
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
@@ -22,7 +34,10 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
22
34
|
```bash
|
|
23
35
|
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
24
36
|
```
|
|
25
|
-
|
|
37
|
+
4. Connect function data to your sequence data
|
|
38
|
+
```bash
|
|
39
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
40
|
+
```
|
|
26
41
|
### Pip Installation (Mac/Linux only)
|
|
27
42
|
|
|
28
43
|
**IMPORTANT**: On Mac M-series chips (M1-M4), gcc 13 and 14 are **REQUIRED**:
|
|
@@ -51,6 +66,18 @@ brew install gcc@13 gcc@14
|
|
|
51
66
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
52
67
|
```
|
|
53
68
|
|
|
69
|
+
5. Combine function data:
|
|
70
|
+
```bash
|
|
71
|
+
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Note for function data we currently expect a LCMS file e.g. with the columns:
|
|
75
|
+
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
76
|
+
- `Area` (which becomes fitness value).
|
|
77
|
+
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
78
|
+
- The last `_X.csv` needs to be the barcode number to match that sample to your plate e.g. if you ran LevSeq with barcode 33 for plate 2 you need to have `_33.csv` for the fitness file for plate 2. e.g. `some_fitnes_for_plate_2_33.csv`.
|
|
79
|
+
|
|
80
|
+
|
|
54
81
|
## Data and Visualization
|
|
55
82
|
|
|
56
83
|
- **Test Data**: Sample data is available on Zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
@@ -121,6 +148,16 @@ For the wet lab protocol:
|
|
|
121
148
|
- **Advanced Usage**: See the [manuscript notebook](https://github.com/fhalab/LevSeq/blob/main/manuscript/notebooks/epPCR_10plates.ipynb)
|
|
122
149
|
- **Troubleshooting**: See our [computational protocols wiki](https://github.com/fhalab/LevSeq/wiki/Computational-protocols)
|
|
123
150
|
|
|
151
|
+
### Local development or install of latest version
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
conda create --name levseq python=3.10
|
|
155
|
+
git clone git@github.com:fhalab/LevSeq.git
|
|
156
|
+
cd LevSeq
|
|
157
|
+
python setup.py sdist bdist_wheel
|
|
158
|
+
pip install dist/levseq-1.4.3.tar.gz
|
|
159
|
+
```
|
|
160
|
+
|
|
124
161
|
## Citing LevSeq
|
|
125
162
|
|
|
126
163
|
If you find LevSeq useful, please cite our paper:
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.4.
|
|
21
|
+
__version__ = '1.4.3'
|
|
22
22
|
__author__ = 'Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -21,6 +21,8 @@ Contain argument parsers used for command line interface and web interface
|
|
|
21
21
|
import os
|
|
22
22
|
import tqdm
|
|
23
23
|
import argparse
|
|
24
|
+
import pandas as pd
|
|
25
|
+
|
|
24
26
|
# Import local packages
|
|
25
27
|
from levseq.run_levseq import run_LevSeq
|
|
26
28
|
|
|
@@ -68,16 +70,73 @@ def build_cli_parser():
|
|
|
68
70
|
help="Whether this experiment came from an oligopool, default is false.")
|
|
69
71
|
optional_args_group.add_argument("--show_msa",
|
|
70
72
|
default=False,
|
|
71
|
-
help="Skip showing msa")
|
|
73
|
+
help="Skip showing msa")
|
|
74
|
+
# if cl_args.get('fitness_files') and cl_args.get('smiles'):
|
|
75
|
+
optional_args_group.add_argument("--fitness_files",
|
|
76
|
+
default=None,
|
|
77
|
+
help="A comma separated list of fitness files (full path) with string quotation marks around them.")
|
|
78
|
+
optional_args_group.add_argument("--smiles",
|
|
79
|
+
default=None,
|
|
80
|
+
help="A smiles string of the reaction with quotation marks around.")
|
|
81
|
+
optional_args_group.add_argument("--compound",
|
|
82
|
+
default=None,
|
|
83
|
+
help="The compound in the fitness files (e.g. pDT or pdt - case sensitive).")
|
|
84
|
+
optional_args_group.add_argument("--variant_df",
|
|
85
|
+
default=None,
|
|
86
|
+
help="The variant dataframe to combine with fitness data.")
|
|
72
87
|
return parser
|
|
73
88
|
|
|
74
89
|
|
|
90
|
+
def combine_seq_func_data(cl_args):
|
|
91
|
+
# Also check if we have any fitness data
|
|
92
|
+
if cl_args.get('fitness_files') and cl_args.get('smiles') and cl_args.get('variant_df'):
|
|
93
|
+
variant_filename = cl_args.get('variant_df')
|
|
94
|
+
variant_df = pd.read_csv(variant_filename)
|
|
95
|
+
# Combine the fitness data with the plate data (note the barcode has to be the last _[barcode])
|
|
96
|
+
# The smiles has to be the reaction smiles
|
|
97
|
+
function_files = cl_args.get('fitness_files')
|
|
98
|
+
compound_name = cl_args.get('compound') if cl_args.get('compound') else 'pdt'
|
|
99
|
+
print(function_files, compound_name)
|
|
100
|
+
all_function_df = pd.DataFrame()
|
|
101
|
+
for function_file in function_files.split(','):
|
|
102
|
+
barcode = function_file.split('.csv')[0].split('_')[-1]
|
|
103
|
+
function_df = pd.read_csv(f'{function_file}')
|
|
104
|
+
function_df.columns = [c.replace('\n', ' ') for c in function_df.columns]
|
|
105
|
+
function_df['function_well'] = [x.split('-')[-1] if isinstance(x, str) else None for x in function_df['Sample Vial Number'].values]
|
|
106
|
+
function_df['function_barcode_plate'] = barcode
|
|
107
|
+
function_df = function_df[function_df['Compound Name'] == compound_name] # We only use pdt or Pdt
|
|
108
|
+
# Convert it to numeric
|
|
109
|
+
function_df['Area'] = pd.to_numeric(function_df['Area'], errors='coerce')
|
|
110
|
+
|
|
111
|
+
function_df['barcode_well'] = [f'{p}_{w}' for w, p in function_df[['function_well', 'function_barcode_plate']].values]
|
|
112
|
+
function_df['filename'] = function_file
|
|
113
|
+
print(function_df.head())
|
|
114
|
+
all_function_df = pd.concat([all_function_df, function_df])
|
|
115
|
+
# Join this with the variant_df barcode plate
|
|
116
|
+
variant_df['barcode_well'] = [f'{p}_{w}' for w, p in variant_df[['Well', 'barcode_plate']].values]
|
|
117
|
+
# Join the two
|
|
118
|
+
variant_df.set_index('barcode_well', inplace=True)
|
|
119
|
+
all_function_df.set_index('barcode_well', inplace=True)
|
|
120
|
+
variant_df = variant_df.join(all_function_df, how='left')
|
|
121
|
+
reaction_smiles = cl_args.get('smiles')
|
|
122
|
+
variant_df['smiles_string'] = reaction_smiles.split('>>')[-1]
|
|
123
|
+
variant_df['reaction_smiles'] = reaction_smiles
|
|
124
|
+
variant_df.columns = [c.lower().replace(' ', '_') for c in variant_df.columns]
|
|
125
|
+
variant_df.rename(columns={'area': 'fitness_value'}, inplace=True)
|
|
126
|
+
variant_df.to_csv(f'{variant_filename.replace(".csv", "_seqfunc.csv")}')
|
|
127
|
+
|
|
128
|
+
# levseq levseq_4.1 ref.csv fitness --fitness_files "20250712_epPCR_Q06714_37.csv,20250712_epPCR_Q06714_38.csv,20250712_epPCR_Q06714_39.csv,20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df visualization_partial.csv
|
|
129
|
+
return variant_df
|
|
130
|
+
|
|
75
131
|
# Execute LevSeq
|
|
76
132
|
def execute_LevSeq():
|
|
77
133
|
# Build parser
|
|
78
134
|
parser = build_cli_parser()
|
|
79
135
|
# Parse the arguments
|
|
80
136
|
CL_ARGS = vars(parser.parse_args())
|
|
137
|
+
if CL_ARGS.get('fitness_files') and CL_ARGS.get('smiles') and CL_ARGS.get('variant_df'):
|
|
138
|
+
print('Combining LevSeq')
|
|
139
|
+
return combine_seq_func_data(CL_ARGS)
|
|
81
140
|
# Set up progres bar
|
|
82
141
|
tqdm_fn = tqdm.tqdm
|
|
83
142
|
# Run LevSeq
|
|
@@ -205,7 +205,7 @@ def calculate_mutation_significance_across_well(seq_df):
|
|
|
205
205
|
seq_df.at[i, 'p(g)'] = p_g
|
|
206
206
|
seq_df.at[i, 'p(c)'] = p_c
|
|
207
207
|
seq_df.at[i, 'p(n)'] = p_n
|
|
208
|
-
seq_df.at[i, 'p(i)'] =
|
|
208
|
+
seq_df.at[i, 'p(i)'] = p_i
|
|
209
209
|
seq_df.at[i, 'p_value'] = p_value
|
|
210
210
|
seq_df.at[i, 'percent_most_freq_mutation'] = val
|
|
211
211
|
seq_df.at[i, 'most_frequent'] = actual_seq
|
|
@@ -324,6 +324,8 @@ def get_reads_for_well(parent_name, bam_file_path: str, ref_str: str, msa_path=N
|
|
|
324
324
|
'C', 'p(c)', 'N', 'p(n)', 'I', 'p(i)', 'Warnings']
|
|
325
325
|
return calculate_mutation_significance_across_well(seq_df), alignment_count
|
|
326
326
|
return None, 0
|
|
327
|
+
|
|
328
|
+
|
|
327
329
|
def make_row_from_read_pileup_across_well(well_df, ref_str, label, insert_map):
|
|
328
330
|
"""
|
|
329
331
|
Given a pileup of reads, we want to get some summary information about that sequence
|
|
@@ -349,12 +351,12 @@ def make_row_from_read_pileup_across_well(well_df, ref_str, label, insert_map):
|
|
|
349
351
|
warning = f'WARNING: INSERT.'
|
|
350
352
|
rows.append([label, col, ref_seq, actual_seq, freq_non_ref, total_other, total_reads, 1.0, 0.0,
|
|
351
353
|
len(vc[vc == 'A']), 1.0, len(vc[vc == 'T']), 1.0, len(vc[vc == 'G']), 1.0,
|
|
352
|
-
len(vc[vc == 'C']), 1.0, len(vc[vc == '-']), 1.0, len(
|
|
354
|
+
len(vc[vc == 'C']), 1.0, len(vc[vc == '-']), 1.0, len(vc[vc == 'I']),
|
|
353
355
|
1.0, warning])
|
|
354
|
-
|
|
356
|
+
elif ref_seq != '-':
|
|
355
357
|
rows.append([label, col, ref_seq, actual_seq, freq_non_ref, total_other, total_reads, 1.0, 0.0,
|
|
356
358
|
len(vc[vc == 'A']), 1.0, len(vc[vc == 'T']), 1.0, len(vc[vc == 'G']), 1.0,
|
|
357
|
-
len(vc[vc == 'C']), 1.0, len(vc[vc == '-']), 1.0,
|
|
359
|
+
len(vc[vc == 'C']), 1.0, len(vc[vc == '-']), 1.0, len(vc[vc == 'I']),
|
|
358
360
|
1.0, warning])
|
|
359
361
|
return rows
|
|
360
362
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -44,6 +44,18 @@ Requires-Dist: scikit-learn
|
|
|
44
44
|
Requires-Dist: statsmodels
|
|
45
45
|
Requires-Dist: tqdm
|
|
46
46
|
Requires-Dist: biopandas
|
|
47
|
+
Dynamic: author
|
|
48
|
+
Dynamic: author-email
|
|
49
|
+
Dynamic: classifier
|
|
50
|
+
Dynamic: description
|
|
51
|
+
Dynamic: description-content-type
|
|
52
|
+
Dynamic: home-page
|
|
53
|
+
Dynamic: keywords
|
|
54
|
+
Dynamic: license
|
|
55
|
+
Dynamic: license-file
|
|
56
|
+
Dynamic: project-url
|
|
57
|
+
Dynamic: requires-dist
|
|
58
|
+
Dynamic: requires-python
|
|
47
59
|
|
|
48
60
|
# Variant Sequencing with Nanopore (LevSeq)
|
|
49
61
|
|
|
@@ -51,9 +63,21 @@ LevSeq provides a streamlined pipeline for sequencing and analyzing genetic vari
|
|
|
51
63
|
|
|
52
64
|

|
|
53
65
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
66
|
+
## Notes
|
|
67
|
+
|
|
68
|
+
LevSeq was designed for epPCR and SSM experiments, however, we are currently extending it to work for other enzyme engineering designs as well, the current features are under development:
|
|
69
|
+
|
|
70
|
+
1. Insertion handling (see version 4.1.3) - thanks to Brian Zhong for his contributions to this section!
|
|
71
|
+
2. Gene calling (handling different genes, use the `--oligopool` flag)
|
|
72
|
+
|
|
73
|
+
If you notice any issues with new features or have adapted the LevSeq code for your own use cases, we would love community contributions! Please submit either an issue, or a pull request and we will aim to incorperate the changes.
|
|
54
74
|
|
|
55
75
|
## Quick Start
|
|
56
76
|
|
|
77
|
+
Note the current stable version is: `1.4.2`, the latest version is `1.4.3`.
|
|
78
|
+
|
|
79
|
+
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
80
|
+
|
|
57
81
|
### Docker Installation (Recommended)
|
|
58
82
|
|
|
59
83
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
@@ -69,7 +93,10 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
69
93
|
```bash
|
|
70
94
|
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
71
95
|
```
|
|
72
|
-
|
|
96
|
+
4. Connect function data to your sequence data
|
|
97
|
+
```bash
|
|
98
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.4-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
99
|
+
```
|
|
73
100
|
### Pip Installation (Mac/Linux only)
|
|
74
101
|
|
|
75
102
|
**IMPORTANT**: On Mac M-series chips (M1-M4), gcc 13 and 14 are **REQUIRED**:
|
|
@@ -98,6 +125,18 @@ brew install gcc@13 gcc@14
|
|
|
98
125
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
99
126
|
```
|
|
100
127
|
|
|
128
|
+
5. Combine function data:
|
|
129
|
+
```bash
|
|
130
|
+
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Note for function data we currently expect a LCMS file e.g. with the columns:
|
|
134
|
+
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
135
|
+
- `Area` (which becomes fitness value).
|
|
136
|
+
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
137
|
+
- The last `_X.csv` needs to be the barcode number to match that sample to your plate e.g. if you ran LevSeq with barcode 33 for plate 2 you need to have `_33.csv` for the fitness file for plate 2. e.g. `some_fitnes_for_plate_2_33.csv`.
|
|
138
|
+
|
|
139
|
+
|
|
101
140
|
## Data and Visualization
|
|
102
141
|
|
|
103
142
|
- **Test Data**: Sample data is available on Zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
@@ -168,6 +207,16 @@ For the wet lab protocol:
|
|
|
168
207
|
- **Advanced Usage**: See the [manuscript notebook](https://github.com/fhalab/LevSeq/blob/main/manuscript/notebooks/epPCR_10plates.ipynb)
|
|
169
208
|
- **Troubleshooting**: See our [computational protocols wiki](https://github.com/fhalab/LevSeq/wiki/Computational-protocols)
|
|
170
209
|
|
|
210
|
+
### Local development or install of latest version
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
conda create --name levseq python=3.10
|
|
214
|
+
git clone git@github.com:fhalab/LevSeq.git
|
|
215
|
+
cd LevSeq
|
|
216
|
+
python setup.py sdist bdist_wheel
|
|
217
|
+
pip install dist/levseq-1.4.3.tar.gz
|
|
218
|
+
```
|
|
219
|
+
|
|
171
220
|
## Citing LevSeq
|
|
172
221
|
|
|
173
222
|
If you find LevSeq useful, please cite our paper:
|
|
@@ -283,7 +283,7 @@ class TestVariantCalling(TestClass):
|
|
|
283
283
|
|
|
284
284
|
def test_calling_variant_with_insert(self):
|
|
285
285
|
u.dp(["Testing calling variants using SSM with error"])
|
|
286
|
-
|
|
286
|
+
# ToDo: Update this with new calling need a new test for this
|
|
287
287
|
parent_sequence = "ATGAGT"
|
|
288
288
|
mutated_sequence = 'ATGAGT' # Not actually mutated
|
|
289
289
|
parent_name = 'parent'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|