levseq 1.5__tar.gz → 1.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.5/levseq.egg-info → levseq-1.5.1}/PKG-INFO +55 -16
- {levseq-1.5 → levseq-1.5.1}/README.md +54 -15
- levseq-1.5.1/levseq/__init__.py +215 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/run_levseq.py +16 -42
- {levseq-1.5 → levseq-1.5.1/levseq.egg-info}/PKG-INFO +55 -16
- {levseq-1.5 → levseq-1.5.1}/setup.py +2 -2
- levseq-1.5/levseq/__init__.py +0 -34
- {levseq-1.5 → levseq-1.5.1}/LICENSE +0 -0
- {levseq-1.5 → levseq-1.5.1}/MANIFEST.in +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/IO_processor.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/basecaller.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/cmd.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/coordinates.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/filter_orientation.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/globals.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/interface.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/parser.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/screen.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/seqfit.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/simulation.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/user.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/utils.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/variantcaller.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq/visualization.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq.egg-info/SOURCES.txt +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.5 → levseq-1.5.1}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.5 → levseq-1.5.1}/setup.cfg +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_copy_fastq.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_deploy.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_opligopools.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_seqfitvis.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_seqs.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_statistics.py +0 -0
- {levseq-1.5 → levseq-1.5.1}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.5
|
|
3
|
+
Version: 1.5.1
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -74,43 +74,78 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
74
74
|
- **Reversed workflow architecture**: LevSeq 2.0 will perform alignment first, then demultiplexing (rather than the current demultiplexing-first approach), resolving issues with forward and reverse read handling
|
|
75
75
|
- **Improved accuracy**: These changes will provide more robust demultiplexing and variant calling across diverse experimental conditions
|
|
76
76
|
|
|
77
|
-
**
|
|
77
|
+
**If you are planning to order barcoded primers now, or need detailed help with troubleshooting or barcode design, please reach out at [lyming2021@gmail.com](mailto:lyming2021@gmail.com).**
|
|
78
78
|
|
|
79
79
|
## Notes
|
|
80
80
|
|
|
81
|
-
LevSeq was designed for epPCR and SSM experiments
|
|
81
|
+
LevSeq was designed for epPCR and SSM experiments. We are also extending it to support additional enzyme engineering designs. Current features under development include:
|
|
82
82
|
|
|
83
|
-
1. Insertion handling (see version 4.1.3)
|
|
84
|
-
2. Gene calling
|
|
83
|
+
1. Insertion handling (see version 4.1.3). Thanks to Brian Zhong for contributions to this section.
|
|
84
|
+
2. Gene calling for experiments with different genes, using the `--oligopool` flag.
|
|
85
85
|
|
|
86
|
-
If you notice
|
|
86
|
+
If you notice issues with new features or have adapted LevSeq for your own use case, community contributions are welcome. Please submit an issue or pull request and we will aim to incorporate the changes.
|
|
87
87
|
|
|
88
88
|
Performance update: demultiplexing now runs in parallel batches of 8 plates and input FASTQs are staged once per run, improving throughput on multi-core systems.
|
|
89
89
|
|
|
90
|
+
Recent repository polish:
|
|
91
|
+
- Faster imports: `import levseq` no longer initializes visualization libraries unless they are needed.
|
|
92
|
+
- Cleaner run startup: plotting dependencies are loaded only when platemaps are generated.
|
|
93
|
+
- Packaging cleanup: bundled barcode files and demultiplex binaries are now declared through package discovery.
|
|
94
|
+
- Git hygiene: local `node_modules/` folders are ignored.
|
|
95
|
+
|
|
90
96
|
## Quick Start
|
|
91
97
|
|
|
92
|
-
Note the current stable version is: `1.5`, the latest version is `1.5`.
|
|
98
|
+
Note the current stable version is: `1.5.1`, the latest version is `1.5.1`.
|
|
93
99
|
|
|
94
100
|
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
95
101
|
|
|
102
|
+
### How to Run LevSeq
|
|
103
|
+
|
|
104
|
+
Before running LevSeq, prepare:
|
|
105
|
+
- A folder containing Oxford Nanopore basecalled FASTQ files, usually from a `fastq_pass` directory.
|
|
106
|
+
- A reference CSV file with the columns `barcode_plate`, `name`, and `refseq` (see [Reference File Format](#reference-file-format-refcsv)).
|
|
107
|
+
- A run name, which LevSeq uses as the output folder name.
|
|
108
|
+
|
|
109
|
+
The basic command format is:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
levseq <run_name> <path_to_fastq_folder> <path_to_ref_csv>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
levseq my_experiment /path/to/fastq_pass /path/to/ref.csv
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
LevSeq writes results to an output folder named after `<run_name>`. Key outputs include `variants.csv`, `visualization_partial.csv`, result CSV files, logs, and interactive platemap HTML files.
|
|
122
|
+
|
|
123
|
+
Common run options:
|
|
124
|
+
- Use `--output /path/to/output` to choose where the run folder is created.
|
|
125
|
+
- Use `--skip_demultiplexing` if reads have already been demultiplexed.
|
|
126
|
+
- Use `--skip_variantcalling` if you only want to run demultiplexing.
|
|
127
|
+
- Use `--oligopool` for experiments with multiple genes or references per barcode plate.
|
|
128
|
+
- Use `--show_msa` to include multiple sequence alignment views in the output.
|
|
129
|
+
|
|
96
130
|
### Docker Installation (Recommended)
|
|
97
131
|
|
|
98
132
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
99
133
|
2. Pull the appropriate image:
|
|
100
134
|
```bash
|
|
101
135
|
# For Linux/Windows x86 systems:
|
|
102
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
136
|
+
docker pull yueminglong/levseq:levseq-1.5.1-x86
|
|
103
137
|
|
|
104
138
|
# For Mac M-series chips (M1, M2, M3, M4):
|
|
105
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
139
|
+
docker pull yueminglong/levseq:levseq-1.5.1-arm64
|
|
106
140
|
```
|
|
107
141
|
3. Run LevSeq:
|
|
108
142
|
```bash
|
|
109
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
143
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
110
144
|
```
|
|
145
|
+
Replace `levseq-1.5.1-arm64` with the image tag that matches your platform and release.
|
|
111
146
|
4. Connect function data to your sequence data
|
|
112
147
|
```bash
|
|
113
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
148
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
114
149
|
```
|
|
115
150
|
### Pip Installation (Mac/Linux only)
|
|
116
151
|
|
|
@@ -145,11 +180,11 @@ brew install gcc@13 gcc@14
|
|
|
145
180
|
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
146
181
|
```
|
|
147
182
|
|
|
148
|
-
|
|
183
|
+
For function data, LevSeq currently expects LCMS files with these columns:
|
|
149
184
|
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
150
185
|
- `Area` (which becomes fitness value).
|
|
151
186
|
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
152
|
-
- The
|
|
187
|
+
- The final `_X.csv` suffix should contain the barcode number used to match that sample to the correct plate. For example, if plate 2 used barcode 33, the fitness file should end in `_33.csv`, such as `some_fitness_for_plate_2_33.csv`.
|
|
153
188
|
|
|
154
189
|
|
|
155
190
|
## Data and Visualization
|
|
@@ -188,6 +223,10 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
188
223
|
- `--output`: Custom save location (defaults to current directory)
|
|
189
224
|
- `--show_msa`: Show multiple sequence alignment for each well
|
|
190
225
|
- `--oligopool`: Process data as oligopool experiment
|
|
226
|
+
- `--fitness_files`: Comma-separated LCMS or function-data CSV files to join with sequence results
|
|
227
|
+
- `--smiles`: Reaction SMILES string used when joining function data
|
|
228
|
+
- `--compound`: Compound name to filter in the function-data files
|
|
229
|
+
- `--variant_df`: LevSeq variant dataframe to join with function data, usually `visualization_partial.csv`
|
|
191
230
|
|
|
192
231
|
## Step-by-Step Tutorial
|
|
193
232
|
|
|
@@ -198,7 +237,7 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
198
237
|
2. **Run LevSeq**:
|
|
199
238
|
```bash
|
|
200
239
|
# Via Docker
|
|
201
|
-
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
240
|
+
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
202
241
|
|
|
203
242
|
# Via pip
|
|
204
243
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
@@ -229,7 +268,7 @@ conda create --name levseq python=3.10
|
|
|
229
268
|
git clone git@github.com:fhalab/LevSeq.git
|
|
230
269
|
cd LevSeq
|
|
231
270
|
python setup.py sdist bdist_wheel
|
|
232
|
-
pip install dist/levseq-1.
|
|
271
|
+
pip install dist/levseq-1.5.1.tar.gz
|
|
233
272
|
```
|
|
234
273
|
|
|
235
274
|
## Citing LevSeq
|
|
@@ -248,4 +287,4 @@ If you find LevSeq useful, please cite our paper:
|
|
|
248
287
|
|
|
249
288
|
## Contact
|
|
250
289
|
|
|
251
|
-
|
|
290
|
+
For detailed questions, troubleshooting, barcode design support, or feature requests, email [lyming2021@gmail.com](mailto:lyming2021@gmail.com). Reproducible bugs and public feature discussions are also welcome as GitHub issues.
|
|
@@ -15,43 +15,78 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
15
15
|
- **Reversed workflow architecture**: LevSeq 2.0 will perform alignment first, then demultiplexing (rather than the current demultiplexing-first approach), resolving issues with forward and reverse read handling
|
|
16
16
|
- **Improved accuracy**: These changes will provide more robust demultiplexing and variant calling across diverse experimental conditions
|
|
17
17
|
|
|
18
|
-
**
|
|
18
|
+
**If you are planning to order barcoded primers now, or need detailed help with troubleshooting or barcode design, please reach out at [lyming2021@gmail.com](mailto:lyming2021@gmail.com).**
|
|
19
19
|
|
|
20
20
|
## Notes
|
|
21
21
|
|
|
22
|
-
LevSeq was designed for epPCR and SSM experiments
|
|
22
|
+
LevSeq was designed for epPCR and SSM experiments. We are also extending it to support additional enzyme engineering designs. Current features under development include:
|
|
23
23
|
|
|
24
|
-
1. Insertion handling (see version 4.1.3)
|
|
25
|
-
2. Gene calling
|
|
24
|
+
1. Insertion handling (see version 4.1.3). Thanks to Brian Zhong for contributions to this section.
|
|
25
|
+
2. Gene calling for experiments with different genes, using the `--oligopool` flag.
|
|
26
26
|
|
|
27
|
-
If you notice
|
|
27
|
+
If you notice issues with new features or have adapted LevSeq for your own use case, community contributions are welcome. Please submit an issue or pull request and we will aim to incorporate the changes.
|
|
28
28
|
|
|
29
29
|
Performance update: demultiplexing now runs in parallel batches of 8 plates and input FASTQs are staged once per run, improving throughput on multi-core systems.
|
|
30
30
|
|
|
31
|
+
Recent repository polish:
|
|
32
|
+
- Faster imports: `import levseq` no longer initializes visualization libraries unless they are needed.
|
|
33
|
+
- Cleaner run startup: plotting dependencies are loaded only when platemaps are generated.
|
|
34
|
+
- Packaging cleanup: bundled barcode files and demultiplex binaries are now declared through package discovery.
|
|
35
|
+
- Git hygiene: local `node_modules/` folders are ignored.
|
|
36
|
+
|
|
31
37
|
## Quick Start
|
|
32
38
|
|
|
33
|
-
Note the current stable version is: `1.5`, the latest version is `1.5`.
|
|
39
|
+
Note the current stable version is: `1.5.1`, the latest version is `1.5.1`.
|
|
34
40
|
|
|
35
41
|
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
36
42
|
|
|
43
|
+
### How to Run LevSeq
|
|
44
|
+
|
|
45
|
+
Before running LevSeq, prepare:
|
|
46
|
+
- A folder containing Oxford Nanopore basecalled FASTQ files, usually from a `fastq_pass` directory.
|
|
47
|
+
- A reference CSV file with the columns `barcode_plate`, `name`, and `refseq` (see [Reference File Format](#reference-file-format-refcsv)).
|
|
48
|
+
- A run name, which LevSeq uses as the output folder name.
|
|
49
|
+
|
|
50
|
+
The basic command format is:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
levseq <run_name> <path_to_fastq_folder> <path_to_ref_csv>
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
levseq my_experiment /path/to/fastq_pass /path/to/ref.csv
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
LevSeq writes results to an output folder named after `<run_name>`. Key outputs include `variants.csv`, `visualization_partial.csv`, result CSV files, logs, and interactive platemap HTML files.
|
|
63
|
+
|
|
64
|
+
Common run options:
|
|
65
|
+
- Use `--output /path/to/output` to choose where the run folder is created.
|
|
66
|
+
- Use `--skip_demultiplexing` if reads have already been demultiplexed.
|
|
67
|
+
- Use `--skip_variantcalling` if you only want to run demultiplexing.
|
|
68
|
+
- Use `--oligopool` for experiments with multiple genes or references per barcode plate.
|
|
69
|
+
- Use `--show_msa` to include multiple sequence alignment views in the output.
|
|
70
|
+
|
|
37
71
|
### Docker Installation (Recommended)
|
|
38
72
|
|
|
39
73
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
40
74
|
2. Pull the appropriate image:
|
|
41
75
|
```bash
|
|
42
76
|
# For Linux/Windows x86 systems:
|
|
43
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
77
|
+
docker pull yueminglong/levseq:levseq-1.5.1-x86
|
|
44
78
|
|
|
45
79
|
# For Mac M-series chips (M1, M2, M3, M4):
|
|
46
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
80
|
+
docker pull yueminglong/levseq:levseq-1.5.1-arm64
|
|
47
81
|
```
|
|
48
82
|
3. Run LevSeq:
|
|
49
83
|
```bash
|
|
50
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
84
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
51
85
|
```
|
|
86
|
+
Replace `levseq-1.5.1-arm64` with the image tag that matches your platform and release.
|
|
52
87
|
4. Connect function data to your sequence data
|
|
53
88
|
```bash
|
|
54
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
89
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
55
90
|
```
|
|
56
91
|
### Pip Installation (Mac/Linux only)
|
|
57
92
|
|
|
@@ -86,11 +121,11 @@ brew install gcc@13 gcc@14
|
|
|
86
121
|
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
87
122
|
```
|
|
88
123
|
|
|
89
|
-
|
|
124
|
+
For function data, LevSeq currently expects LCMS files with these columns:
|
|
90
125
|
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
91
126
|
- `Area` (which becomes fitness value).
|
|
92
127
|
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
93
|
-
- The
|
|
128
|
+
- The final `_X.csv` suffix should contain the barcode number used to match that sample to the correct plate. For example, if plate 2 used barcode 33, the fitness file should end in `_33.csv`, such as `some_fitness_for_plate_2_33.csv`.
|
|
94
129
|
|
|
95
130
|
|
|
96
131
|
## Data and Visualization
|
|
@@ -129,6 +164,10 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
129
164
|
- `--output`: Custom save location (defaults to current directory)
|
|
130
165
|
- `--show_msa`: Show multiple sequence alignment for each well
|
|
131
166
|
- `--oligopool`: Process data as oligopool experiment
|
|
167
|
+
- `--fitness_files`: Comma-separated LCMS or function-data CSV files to join with sequence results
|
|
168
|
+
- `--smiles`: Reaction SMILES string used when joining function data
|
|
169
|
+
- `--compound`: Compound name to filter in the function-data files
|
|
170
|
+
- `--variant_df`: LevSeq variant dataframe to join with function data, usually `visualization_partial.csv`
|
|
132
171
|
|
|
133
172
|
## Step-by-Step Tutorial
|
|
134
173
|
|
|
@@ -139,7 +178,7 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
139
178
|
2. **Run LevSeq**:
|
|
140
179
|
```bash
|
|
141
180
|
# Via Docker
|
|
142
|
-
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
181
|
+
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
143
182
|
|
|
144
183
|
# Via pip
|
|
145
184
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
@@ -170,7 +209,7 @@ conda create --name levseq python=3.10
|
|
|
170
209
|
git clone git@github.com:fhalab/LevSeq.git
|
|
171
210
|
cd LevSeq
|
|
172
211
|
python setup.py sdist bdist_wheel
|
|
173
|
-
pip install dist/levseq-1.
|
|
212
|
+
pip install dist/levseq-1.5.1.tar.gz
|
|
174
213
|
```
|
|
175
214
|
|
|
176
215
|
## Citing LevSeq
|
|
@@ -189,4 +228,4 @@ If you find LevSeq useful, please cite our paper:
|
|
|
189
228
|
|
|
190
229
|
## Contact
|
|
191
230
|
|
|
192
|
-
|
|
231
|
+
For detailed questions, troubleshooting, barcode design support, or feature requests, email [lyming2021@gmail.com](mailto:lyming2021@gmail.com). Reproducible bugs and public feature discussions are also welcome as GitHub issues.
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
###############################################################################
|
|
2
|
+
# #
|
|
3
|
+
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
+
# it under the terms of the GNU General Public License as published by #
|
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
+
# (at your option) any later version. #
|
|
7
|
+
# #
|
|
8
|
+
# This program is distributed in the hope that it will be useful, #
|
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
+
# GNU General Public License for more details. #
|
|
12
|
+
# #
|
|
13
|
+
# You should have received a copy of the GNU General Public License #
|
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
+
# #
|
|
16
|
+
###############################################################################
|
|
17
|
+
|
|
18
|
+
__title__ = 'levseq'
|
|
19
|
+
__description__ = 'LevSeq nanopore sequencing'
|
|
20
|
+
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
+
__version__ = '1.5.1'
|
|
22
|
+
__author__ = 'Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy'
|
|
23
|
+
__author_email__ = 'ylong@caltech.edu'
|
|
24
|
+
__license__ = 'GPL3'
|
|
25
|
+
|
|
26
|
+
from importlib import import_module
|
|
27
|
+
import sys as _sys
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
_LAZY_MODULES = {
|
|
31
|
+
'cmd': 'levseq.cmd',
|
|
32
|
+
'filter_orientation': 'levseq.filter_orientation',
|
|
33
|
+
'globals': 'levseq.globals',
|
|
34
|
+
'interface': 'levseq.interface',
|
|
35
|
+
'run_levseq': 'levseq.run_levseq',
|
|
36
|
+
'simulation': 'levseq.simulation',
|
|
37
|
+
'user': 'levseq.user',
|
|
38
|
+
'utils': 'levseq.utils',
|
|
39
|
+
'variantcaller': 'levseq.variantcaller',
|
|
40
|
+
'visualization': 'levseq.visualization',
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
_LAZY_EXPORTS = {
|
|
44
|
+
'ALL_AAS': 'levseq.variantcaller',
|
|
45
|
+
'AlignIO': 'levseq.visualization',
|
|
46
|
+
'CODONS': 'levseq.globals',
|
|
47
|
+
'CWD': 'levseq.interface',
|
|
48
|
+
'ColumnDataSource': 'levseq.visualization',
|
|
49
|
+
'Counter': 'levseq.visualization',
|
|
50
|
+
'CustomJS': 'levseq.visualization',
|
|
51
|
+
'DEFAULT_TARGETS': 'levseq.globals',
|
|
52
|
+
'Div': 'levseq.visualization',
|
|
53
|
+
'FactorRange': 'levseq.visualization',
|
|
54
|
+
'HoverTool': 'levseq.visualization',
|
|
55
|
+
'Label': 'levseq.visualization',
|
|
56
|
+
'Legend': 'levseq.visualization',
|
|
57
|
+
'LegendItem': 'levseq.visualization',
|
|
58
|
+
'Motif': 'levseq.visualization',
|
|
59
|
+
'MultipleSeqAlignment': 'levseq.visualization',
|
|
60
|
+
'NUC_COLOR_DICT': 'levseq.visualization',
|
|
61
|
+
'PCA': 'levseq.user',
|
|
62
|
+
'Path': 'levseq.variantcaller',
|
|
63
|
+
'Range1d': 'levseq.visualization',
|
|
64
|
+
'RangeSlider': 'levseq.visualization',
|
|
65
|
+
'Rect': 'levseq.visualization',
|
|
66
|
+
'SCORE_MATRIX': 'levseq.globals',
|
|
67
|
+
'SW_ALIGN_PARAMS': 'levseq.globals',
|
|
68
|
+
'SciUtil': 'levseq.user',
|
|
69
|
+
'Seq': 'levseq.filter_orientation',
|
|
70
|
+
'SeqIO': 'levseq.variantcaller',
|
|
71
|
+
'Spacer': 'levseq.visualization',
|
|
72
|
+
'Tap': 'levseq.visualization',
|
|
73
|
+
'TapTool': 'levseq.visualization',
|
|
74
|
+
'Text': 'levseq.visualization',
|
|
75
|
+
'ThreadPool': 'levseq.variantcaller',
|
|
76
|
+
'ThreadPoolExecutor': 'levseq.filter_orientation',
|
|
77
|
+
'VariantCaller': 'levseq.variantcaller',
|
|
78
|
+
'WELL_IDS': 'levseq.visualization',
|
|
79
|
+
'aa1': 'levseq.variantcaller',
|
|
80
|
+
'aa_to_index': 'levseq.user',
|
|
81
|
+
'aggregate_conservation': 'levseq.visualization',
|
|
82
|
+
'aggregate_gray_blocks': 'levseq.visualization',
|
|
83
|
+
'alignment_from_cigar': 'levseq.variantcaller',
|
|
84
|
+
'amino_acid_to_codon': 'levseq.variantcaller',
|
|
85
|
+
'amino_acids': 'levseq.user',
|
|
86
|
+
'annotations': 'levseq.visualization',
|
|
87
|
+
'argparse': 'levseq.interface',
|
|
88
|
+
'as_completed': 'levseq.filter_orientation',
|
|
89
|
+
'basecall_model': 'levseq.interface',
|
|
90
|
+
'binomtest': 'levseq.variantcaller',
|
|
91
|
+
'build_cli_parser': 'levseq.interface',
|
|
92
|
+
'build_kmer_set': 'levseq.filter_orientation',
|
|
93
|
+
'calc_mutation_significance_for_position_in_well': 'levseq.variantcaller',
|
|
94
|
+
'calculate_mutation_significance_across_well': 'levseq.variantcaller',
|
|
95
|
+
'cc': 'levseq.visualization',
|
|
96
|
+
'check_demultiplexing': 'levseq.variantcaller',
|
|
97
|
+
'check_variants': 'levseq.simulation',
|
|
98
|
+
'column': 'levseq.visualization',
|
|
99
|
+
'combine_pvalues': 'levseq.variantcaller',
|
|
100
|
+
'combine_seq_func_data': 'levseq.interface',
|
|
101
|
+
'convert_variant_df_to_seqs': 'levseq.user',
|
|
102
|
+
'count_kmer_hits': 'levseq.filter_orientation',
|
|
103
|
+
'deepcopy': 'levseq.variantcaller',
|
|
104
|
+
'defaultdict': 'levseq.variantcaller',
|
|
105
|
+
'execute_LevSeq': 'levseq.interface',
|
|
106
|
+
'figure': 'levseq.visualization',
|
|
107
|
+
'filter_demultiplexed_folder': 'levseq.filter_orientation',
|
|
108
|
+
'filter_single_file': 'levseq.filter_orientation',
|
|
109
|
+
'generate_epcr_library': 'levseq.simulation',
|
|
110
|
+
'generate_platemaps': 'levseq.visualization',
|
|
111
|
+
'generate_ssm_library': 'levseq.simulation',
|
|
112
|
+
'get_colour': 'levseq.user',
|
|
113
|
+
'get_cons': 'levseq.visualization',
|
|
114
|
+
'get_cons_diff_colorNseq': 'levseq.visualization',
|
|
115
|
+
'get_cons_seq': 'levseq.visualization',
|
|
116
|
+
'get_dummy_plate_df': 'levseq.variantcaller',
|
|
117
|
+
'get_mut': 'levseq.variantcaller',
|
|
118
|
+
'get_reads_for_well': 'levseq.variantcaller',
|
|
119
|
+
'get_sequence_colors': 'levseq.visualization',
|
|
120
|
+
'get_sequence_diff_colorNseq': 'levseq.visualization',
|
|
121
|
+
'get_template_df': 'levseq.variantcaller',
|
|
122
|
+
'get_variant_label_for_well': 'levseq.variantcaller',
|
|
123
|
+
'get_well_ids': 'levseq.visualization',
|
|
124
|
+
'glob': 'levseq.variantcaller',
|
|
125
|
+
'gridplot': 'levseq.visualization',
|
|
126
|
+
'gzip': 'levseq.filter_orientation',
|
|
127
|
+
'hv': 'levseq.visualization',
|
|
128
|
+
'init_notebook_env': 'levseq.visualization',
|
|
129
|
+
'insert_nt': 'levseq.simulation',
|
|
130
|
+
'iter_fastq_records': 'levseq.filter_orientation',
|
|
131
|
+
'logger': 'levseq.variantcaller',
|
|
132
|
+
'logging': 'levseq.variantcaller',
|
|
133
|
+
'main': 'levseq.cmd',
|
|
134
|
+
'make_epcr_de_experiment': 'levseq.simulation',
|
|
135
|
+
'make_experiment': 'levseq.simulation',
|
|
136
|
+
'make_mixed_well_epcr_de_experiment': 'levseq.simulation',
|
|
137
|
+
'make_msa': 'levseq.user',
|
|
138
|
+
'make_oligopool_plates': 'levseq.visualization',
|
|
139
|
+
'make_pca': 'levseq.user',
|
|
140
|
+
'make_row_from_read_pileup_across_well': 'levseq.variantcaller',
|
|
141
|
+
'make_ssm_de_experiment': 'levseq.simulation',
|
|
142
|
+
'make_well_df_for_saving': 'levseq.simulation',
|
|
143
|
+
'make_well_df_from_reads': 'levseq.variantcaller',
|
|
144
|
+
'match_color': 'levseq.visualization',
|
|
145
|
+
'math': 'levseq.variantcaller',
|
|
146
|
+
'min_depth': 'levseq.interface',
|
|
147
|
+
'mpl': 'levseq.visualization',
|
|
148
|
+
'multipletests': 'levseq.variantcaller',
|
|
149
|
+
'mutate_sequence': 'levseq.simulation',
|
|
150
|
+
'np': 'levseq.variantcaller',
|
|
151
|
+
'ns': 'levseq.visualization',
|
|
152
|
+
'one_hot_encode': 'levseq.user',
|
|
153
|
+
'os': 'levseq.variantcaller',
|
|
154
|
+
'output_file': 'levseq.visualization',
|
|
155
|
+
'output_notebook': 'levseq.visualization',
|
|
156
|
+
'padding_end': 'levseq.interface',
|
|
157
|
+
'padding_start': 'levseq.interface',
|
|
158
|
+
'pd': 'levseq.variantcaller',
|
|
159
|
+
'plot_empty': 'levseq.visualization',
|
|
160
|
+
'plot_seaborn_heatmap': 'levseq.visualization',
|
|
161
|
+
'plot_sequence_alignment': 'levseq.visualization',
|
|
162
|
+
'plt': 'levseq.visualization',
|
|
163
|
+
'pn': 'levseq.visualization',
|
|
164
|
+
'postprocess_variant_df': 'levseq.variantcaller',
|
|
165
|
+
'pysam': 'levseq.variantcaller',
|
|
166
|
+
'random': 'levseq.variantcaller',
|
|
167
|
+
're': 'levseq.variantcaller',
|
|
168
|
+
'row': 'levseq.visualization',
|
|
169
|
+
'run_LevSeq': 'levseq.interface',
|
|
170
|
+
'sample_kmer_positions': 'levseq.filter_orientation',
|
|
171
|
+
'save': 'levseq.visualization',
|
|
172
|
+
'show': 'levseq.visualization',
|
|
173
|
+
'shutil': 'levseq.variantcaller',
|
|
174
|
+
'sns': 'levseq.visualization',
|
|
175
|
+
'subprocess': 'levseq.variantcaller',
|
|
176
|
+
'sys': 'levseq.visualization',
|
|
177
|
+
'threshold': 'levseq.interface',
|
|
178
|
+
'tqdm': 'levseq.variantcaller',
|
|
179
|
+
'translate': 'levseq.variantcaller',
|
|
180
|
+
'u': 'levseq.user',
|
|
181
|
+
'warnings': 'levseq.variantcaller',
|
|
182
|
+
'well2nb': 'levseq.visualization',
|
|
183
|
+
'write_msa_for_df': 'levseq.simulation',
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
__all__ = [
|
|
187
|
+
'__title__',
|
|
188
|
+
'__description__',
|
|
189
|
+
'__url__',
|
|
190
|
+
'__version__',
|
|
191
|
+
'__author__',
|
|
192
|
+
'__author_email__',
|
|
193
|
+
'__license__',
|
|
194
|
+
*sorted(_LAZY_MODULES),
|
|
195
|
+
*sorted(_LAZY_EXPORTS),
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def __getattr__(name):
|
|
200
|
+
if name in _LAZY_MODULES:
|
|
201
|
+
module = import_module(_LAZY_MODULES[name])
|
|
202
|
+
vars(_sys.modules[__name__])[name] = module
|
|
203
|
+
return module
|
|
204
|
+
|
|
205
|
+
if name in _LAZY_EXPORTS:
|
|
206
|
+
module = import_module(_LAZY_EXPORTS[name])
|
|
207
|
+
value = getattr(module, name)
|
|
208
|
+
vars(_sys.modules[__name__])[name] = value
|
|
209
|
+
return value
|
|
210
|
+
|
|
211
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def __dir__():
|
|
215
|
+
return sorted(set(vars(_sys.modules[__name__])) | set(__all__))
|
|
@@ -15,56 +15,24 @@
|
|
|
15
15
|
# #
|
|
16
16
|
###############################################################################
|
|
17
17
|
|
|
18
|
-
# Import MinION objects
|
|
19
|
-
from levseq import *
|
|
20
|
-
from levseq.filter_orientation import filter_demultiplexed_folder
|
|
21
|
-
# Import external packages
|
|
22
18
|
import logging
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
import numpy as np
|
|
25
|
-
import pandas as pd
|
|
26
|
-
from importlib import resources
|
|
27
|
-
import subprocess
|
|
28
|
-
from Bio import SeqIO
|
|
29
|
-
import tqdm
|
|
30
|
-
import platform
|
|
31
|
-
import subprocess
|
|
32
19
|
import os
|
|
20
|
+
import platform
|
|
33
21
|
import re
|
|
34
|
-
import gzip
|
|
35
|
-
import shutil
|
|
36
|
-
|
|
37
|
-
import panel as pn
|
|
38
|
-
import holoviews as hv
|
|
39
|
-
from holoviews.streams import Tap
|
|
40
|
-
import matplotlib
|
|
41
|
-
|
|
42
|
-
import os
|
|
43
|
-
import logging
|
|
44
|
-
import pandas as pd
|
|
45
|
-
from pathlib import Path
|
|
46
22
|
import shutil
|
|
47
23
|
import subprocess
|
|
48
|
-
from
|
|
49
|
-
import
|
|
50
|
-
import numpy as np
|
|
51
|
-
import tqdm
|
|
52
|
-
|
|
53
|
-
import os
|
|
54
|
-
import logging
|
|
55
|
-
import pandas as pd
|
|
24
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
25
|
+
from importlib import resources
|
|
56
26
|
from pathlib import Path
|
|
57
|
-
|
|
58
|
-
import subprocess
|
|
59
|
-
from Bio import SeqIO
|
|
60
|
-
import platform
|
|
27
|
+
|
|
61
28
|
import numpy as np
|
|
29
|
+
import pandas as pd
|
|
62
30
|
import tqdm
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
from
|
|
66
|
-
from
|
|
67
|
-
from
|
|
31
|
+
from Bio import SeqIO
|
|
32
|
+
|
|
33
|
+
from levseq.filter_orientation import filter_demultiplexed_folder
|
|
34
|
+
from levseq.utils import translate
|
|
35
|
+
from levseq.variantcaller import VariantCaller
|
|
68
36
|
|
|
69
37
|
# Utility function to configure logging
|
|
70
38
|
def configure_logging(result_folder, cl_args):
|
|
@@ -472,6 +440,8 @@ def save_platemap_to_file(heatmaps, outputdir, name, show_msa):
|
|
|
472
440
|
if show_msa:
|
|
473
441
|
heatmaps.save(file_path + "_msa.html", embed=True)
|
|
474
442
|
else:
|
|
443
|
+
import holoviews as hv
|
|
444
|
+
|
|
475
445
|
hv.renderer("bokeh").save(heatmaps, file_path)
|
|
476
446
|
|
|
477
447
|
def save_csv(df, outputdir, name):
|
|
@@ -721,6 +691,8 @@ def run_LevSeq(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
721
691
|
processed_csv = os.path.join(result_folder, "visualization_partial.csv")
|
|
722
692
|
df_vis.to_csv(processed_csv, index=False)
|
|
723
693
|
if cl_args["oligopool"]:
|
|
694
|
+
from levseq.visualization import make_oligopool_plates
|
|
695
|
+
|
|
724
696
|
make_oligopool_plates(df_vis, result_folder=result_folder, save_files=True)
|
|
725
697
|
except Exception as e:
|
|
726
698
|
processed_csv = os.path.join(result_folder, "visualization_partial.csv")
|
|
@@ -730,6 +702,8 @@ def run_LevSeq(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
730
702
|
raise
|
|
731
703
|
|
|
732
704
|
try:
|
|
705
|
+
from levseq.visualization import generate_platemaps
|
|
706
|
+
|
|
733
707
|
layout = generate_platemaps(
|
|
734
708
|
max_combo_data=df_vis,
|
|
735
709
|
result_folder=result_folder,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.5
|
|
3
|
+
Version: 1.5.1
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -74,43 +74,78 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
74
74
|
- **Reversed workflow architecture**: LevSeq 2.0 will perform alignment first, then demultiplexing (rather than the current demultiplexing-first approach), resolving issues with forward and reverse read handling
|
|
75
75
|
- **Improved accuracy**: These changes will provide more robust demultiplexing and variant calling across diverse experimental conditions
|
|
76
76
|
|
|
77
|
-
**
|
|
77
|
+
**If you are planning to order barcoded primers now, or need detailed help with troubleshooting or barcode design, please reach out at [lyming2021@gmail.com](mailto:lyming2021@gmail.com).**
|
|
78
78
|
|
|
79
79
|
## Notes
|
|
80
80
|
|
|
81
|
-
LevSeq was designed for epPCR and SSM experiments
|
|
81
|
+
LevSeq was designed for epPCR and SSM experiments. We are also extending it to support additional enzyme engineering designs. Current features under development include:
|
|
82
82
|
|
|
83
|
-
1. Insertion handling (see version 4.1.3)
|
|
84
|
-
2. Gene calling
|
|
83
|
+
1. Insertion handling (see version 4.1.3). Thanks to Brian Zhong for contributions to this section.
|
|
84
|
+
2. Gene calling for experiments with different genes, using the `--oligopool` flag.
|
|
85
85
|
|
|
86
|
-
If you notice
|
|
86
|
+
If you notice issues with new features or have adapted LevSeq for your own use case, community contributions are welcome. Please submit an issue or pull request and we will aim to incorporate the changes.
|
|
87
87
|
|
|
88
88
|
Performance update: demultiplexing now runs in parallel batches of 8 plates and input FASTQs are staged once per run, improving throughput on multi-core systems.
|
|
89
89
|
|
|
90
|
+
Recent repository polish:
|
|
91
|
+
- Faster imports: `import levseq` no longer initializes visualization libraries unless they are needed.
|
|
92
|
+
- Cleaner run startup: plotting dependencies are loaded only when platemaps are generated.
|
|
93
|
+
- Packaging cleanup: bundled barcode files and demultiplex binaries are now declared through package discovery.
|
|
94
|
+
- Git hygiene: local `node_modules/` folders are ignored.
|
|
95
|
+
|
|
90
96
|
## Quick Start
|
|
91
97
|
|
|
92
|
-
Note the current stable version is: `1.5`, the latest version is `1.5`.
|
|
98
|
+
Note the current stable version is: `1.5.1`, the latest version is `1.5.1`.
|
|
93
99
|
|
|
94
100
|
For stable releases these are made available via docker and pip. For latest versions, please clone the repo and install locally (see *Local development or install of latest version* below).
|
|
95
101
|
|
|
102
|
+
### How to Run LevSeq
|
|
103
|
+
|
|
104
|
+
Before running LevSeq, prepare:
|
|
105
|
+
- A folder containing Oxford Nanopore basecalled FASTQ files, usually from a `fastq_pass` directory.
|
|
106
|
+
- A reference CSV file with the columns `barcode_plate`, `name`, and `refseq` (see [Reference File Format](#reference-file-format-refcsv)).
|
|
107
|
+
- A run name, which LevSeq uses as the output folder name.
|
|
108
|
+
|
|
109
|
+
The basic command format is:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
levseq <run_name> <path_to_fastq_folder> <path_to_ref_csv>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
levseq my_experiment /path/to/fastq_pass /path/to/ref.csv
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
LevSeq writes results to an output folder named after `<run_name>`. Key outputs include `variants.csv`, `visualization_partial.csv`, result CSV files, logs, and interactive platemap HTML files.
|
|
122
|
+
|
|
123
|
+
Common run options:
|
|
124
|
+
- Use `--output /path/to/output` to choose where the run folder is created.
|
|
125
|
+
- Use `--skip_demultiplexing` if reads have already been demultiplexed.
|
|
126
|
+
- Use `--skip_variantcalling` if you only want to run demultiplexing.
|
|
127
|
+
- Use `--oligopool` for experiments with multiple genes or references per barcode plate.
|
|
128
|
+
- Use `--show_msa` to include multiple sequence alignment views in the output.
|
|
129
|
+
|
|
96
130
|
### Docker Installation (Recommended)
|
|
97
131
|
|
|
98
132
|
1. Install Docker: [https://docs.docker.com/engine/install/](https://docs.docker.com/engine/install/)
|
|
99
133
|
2. Pull the appropriate image:
|
|
100
134
|
```bash
|
|
101
135
|
# For Linux/Windows x86 systems:
|
|
102
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
136
|
+
docker pull yueminglong/levseq:levseq-1.5.1-x86
|
|
103
137
|
|
|
104
138
|
# For Mac M-series chips (M1, M2, M3, M4):
|
|
105
|
-
docker pull yueminglong/levseq:levseq-1.
|
|
139
|
+
docker pull yueminglong/levseq:levseq-1.5.1-arm64
|
|
106
140
|
```
|
|
107
141
|
3. Run LevSeq:
|
|
108
142
|
```bash
|
|
109
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
143
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
110
144
|
```
|
|
145
|
+
Replace `levseq-1.5.1-arm64` with the image tag that matches your platform and release.
|
|
111
146
|
4. Connect function data to your sequence data
|
|
112
147
|
```bash
|
|
113
|
-
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
148
|
+
docker run --rm -v "/full/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv --fitness_files "levseq_results/20250712_epPCR_Q06714_37.csv,levseq_results/20250712_epPCR_Q06714_39.csv,levseq_results/20250712_epPCR_Q06714_40.csv" --smiles 'O=P(OC1=CC=CC=C1)(OC2=CC=CC=C2)OC3=CC=CC=C3>>O=P(O)(OC4=CC=CC=C4)OC5=CC=CC=C5' --compound dPPi --variant_df "levseq_results/visualization_partial.csv"
|
|
114
149
|
```
|
|
115
150
|
### Pip Installation (Mac/Linux only)
|
|
116
151
|
|
|
@@ -145,11 +180,11 @@ brew install gcc@13 gcc@14
|
|
|
145
180
|
levseq my_experiment /path/to/data/ /path/to/ref.csv "LCMS_file_{barcode1}.csv,LCMS_file_{barcode2}.csv," --smiles 'reaction_smiles_string' --compound "name_of_compound_in_LCMS_file" --variant_df "visualization_partial.csv"
|
|
146
181
|
```
|
|
147
182
|
|
|
148
|
-
|
|
183
|
+
For function data, LevSeq currently expects LCMS files with these columns:
|
|
149
184
|
- `Sample Vial Number` (corresponding to the well that the sample was from).
|
|
150
185
|
- `Area` (which becomes fitness value).
|
|
151
186
|
- `Compound Name` which is the name of the compound we filter for that is passed as a parameter.
|
|
152
|
-
- The
|
|
187
|
+
- The final `_X.csv` suffix should contain the barcode number used to match that sample to the correct plate. For example, if plate 2 used barcode 33, the fitness file should end in `_33.csv`, such as `some_fitness_for_plate_2_33.csv`.
|
|
153
188
|
|
|
154
189
|
|
|
155
190
|
## Data and Visualization
|
|
@@ -188,6 +223,10 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
188
223
|
- `--output`: Custom save location (defaults to current directory)
|
|
189
224
|
- `--show_msa`: Show multiple sequence alignment for each well
|
|
190
225
|
- `--oligopool`: Process data as oligopool experiment
|
|
226
|
+
- `--fitness_files`: Comma-separated LCMS or function-data CSV files to join with sequence results
|
|
227
|
+
- `--smiles`: Reaction SMILES string used when joining function data
|
|
228
|
+
- `--compound`: Compound name to filter in the function-data files
|
|
229
|
+
- `--variant_df`: LevSeq variant dataframe to join with function data, usually `visualization_partial.csv`
|
|
191
230
|
|
|
192
231
|
## Step-by-Step Tutorial
|
|
193
232
|
|
|
@@ -198,7 +237,7 @@ For oligopool experiments (multiple proteins per plate), use:
|
|
|
198
237
|
2. **Run LevSeq**:
|
|
199
238
|
```bash
|
|
200
239
|
# Via Docker
|
|
201
|
-
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.
|
|
240
|
+
docker run --rm -v "/path/to/data:/levseq_results" yueminglong/levseq:levseq-1.5.1-arm64 my_experiment levseq_results/ levseq_results/ref.csv
|
|
202
241
|
|
|
203
242
|
# Via pip
|
|
204
243
|
levseq my_experiment /path/to/data/ /path/to/ref.csv
|
|
@@ -229,7 +268,7 @@ conda create --name levseq python=3.10
|
|
|
229
268
|
git clone git@github.com:fhalab/LevSeq.git
|
|
230
269
|
cd LevSeq
|
|
231
270
|
python setup.py sdist bdist_wheel
|
|
232
|
-
pip install dist/levseq-1.
|
|
271
|
+
pip install dist/levseq-1.5.1.tar.gz
|
|
233
272
|
```
|
|
234
273
|
|
|
235
274
|
## Citing LevSeq
|
|
@@ -248,4 +287,4 @@ If you find LevSeq useful, please cite our paper:
|
|
|
248
287
|
|
|
249
288
|
## Contact
|
|
250
289
|
|
|
251
|
-
|
|
290
|
+
For detailed questions, troubleshooting, barcode design support, or feature requests, email [lyming2021@gmail.com](mailto:lyming2021@gmail.com). Reproducible bugs and public feature discussions are also welcome as GitHub issues.
|
|
@@ -77,13 +77,13 @@ setup(name='levseq',
|
|
|
77
77
|
'Topic :: Scientific/Engineering :: Bio-Informatics',
|
|
78
78
|
],
|
|
79
79
|
keywords=['Nanopore', 'ONT', 'evSeq'],
|
|
80
|
-
packages=['
|
|
80
|
+
packages=find_packages(exclude=['tests', 'tests.*']),
|
|
81
81
|
include_package_data=True,
|
|
82
82
|
package_data={
|
|
83
83
|
'levseq.barcoding': [
|
|
84
84
|
'minion_barcodes.fasta',
|
|
85
85
|
'demultiplex',
|
|
86
|
-
'demultiplex-arm64'
|
|
86
|
+
'demultiplex-arm64',
|
|
87
87
|
'demultiplex-x86',
|
|
88
88
|
],
|
|
89
89
|
},
|
levseq-1.5/levseq/__init__.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
###############################################################################
|
|
2
|
-
# #
|
|
3
|
-
# This program is free software: you can redistribute it and/or modify #
|
|
4
|
-
# it under the terms of the GNU General Public License as published by #
|
|
5
|
-
# the Free Software Foundation, either version 3 of the License, or #
|
|
6
|
-
# (at your option) any later version. #
|
|
7
|
-
# #
|
|
8
|
-
# This program is distributed in the hope that it will be useful, #
|
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
|
11
|
-
# GNU General Public License for more details. #
|
|
12
|
-
# #
|
|
13
|
-
# You should have received a copy of the GNU General Public License #
|
|
14
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
|
|
15
|
-
# #
|
|
16
|
-
###############################################################################
|
|
17
|
-
|
|
18
|
-
__title__ = 'levseq'
|
|
19
|
-
__description__ = 'LevSeq nanopore sequencing'
|
|
20
|
-
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.5'
|
|
22
|
-
__author__ = 'Yueming Long, Ariane Mora, Francesca-Zhoufan Li, Emre Gursoy'
|
|
23
|
-
__author_email__ = 'ylong@caltech.edu'
|
|
24
|
-
__license__ = 'GPL3'
|
|
25
|
-
|
|
26
|
-
from levseq.globals import *
|
|
27
|
-
from levseq.variantcaller import *
|
|
28
|
-
from levseq.visualization import *
|
|
29
|
-
from levseq.interface import *
|
|
30
|
-
from levseq.cmd import *
|
|
31
|
-
from levseq.utils import *
|
|
32
|
-
from levseq.simulation import *
|
|
33
|
-
from levseq.user import *
|
|
34
|
-
from levseq.filter_orientation import *
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|