levseq 1.2.5__tar.gz → 1.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.2.5/levseq.egg-info → levseq-1.2.7}/PKG-INFO +44 -5
- {levseq-1.2.5 → levseq-1.2.7}/README.md +44 -5
- {levseq-1.2.5 → levseq-1.2.7}/levseq/__init__.py +1 -1
- levseq-1.2.7/levseq/coordinates.py +76 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/run_levseq.py +44 -30
- {levseq-1.2.5 → levseq-1.2.7}/levseq/seqfit.py +536 -117
- {levseq-1.2.5 → levseq-1.2.7}/levseq/variantcaller.py +5 -4
- {levseq-1.2.5 → levseq-1.2.7}/levseq/visualization.py +4 -4
- {levseq-1.2.5 → levseq-1.2.7/levseq.egg-info}/PKG-INFO +44 -5
- {levseq-1.2.5 → levseq-1.2.7}/levseq.egg-info/SOURCES.txt +1 -0
- {levseq-1.2.5 → levseq-1.2.7}/LICENSE +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/MANIFEST.in +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/IO_processor.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/basecaller.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/cmd.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/globals.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/interface.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/parser.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/screen.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/simulation.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/user.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq/utils.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/setup.cfg +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/setup.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_opligopools.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_seqfitvis.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_seqs.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_statistics.py +0 -0
- {levseq-1.2.5 → levseq-1.2.7}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.7
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -54,7 +54,7 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
- Data to reproduce the results and to test are available on zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
57
|
-
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://
|
|
57
|
+
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://levseqdb.streamlit.app/) and code to host locally [here](https://github.com/fhalab/LevSeq_db)
|
|
58
58
|
|
|
59
59
|
## Setup
|
|
60
60
|
|
|
@@ -80,7 +80,7 @@ and `minimap2` installed on your path. However, if you have issues we recommend
|
|
|
80
80
|
We recommend using terminal and a conda environment for installation:
|
|
81
81
|
|
|
82
82
|
```
|
|
83
|
-
conda create --name levseq python=3.
|
|
83
|
+
conda create --name levseq python=3.12 -y
|
|
84
84
|
```
|
|
85
85
|
|
|
86
86
|
```
|
|
@@ -93,7 +93,7 @@ conda activate levseq
|
|
|
93
93
|
```
|
|
94
94
|
conda install -c bioconda -c conda-forge samtools
|
|
95
95
|
```
|
|
96
|
-
|
|
96
|
+
|
|
97
97
|
|
|
98
98
|
2. Minimap2: https://github.com/lh3/minimap2
|
|
99
99
|
|
|
@@ -110,11 +110,46 @@ operating system (https://docs.docker.com/engine/install/).
|
|
|
110
110
|
```
|
|
111
111
|
levseq <name of the run you can make this whatever> <location to data folder> <location of reference csv file>
|
|
112
112
|
```
|
|
113
|
+
|
|
113
114
|
#### Run via docker
|
|
115
|
+
If using linux system
|
|
116
|
+
```
|
|
117
|
+
docker pull yueminglong/levseq:levseq-1.2.5-x86
|
|
114
118
|
```
|
|
115
|
-
|
|
119
|
+
If using Mac M chips (image tested on M1, M3, and M4)
|
|
120
|
+
```
|
|
121
|
+
docker pull yueminglong/levseq:levseq-1.2.5-arm64
|
|
122
|
+
```
|
|
123
|
+
|
|
116
124
|
```
|
|
125
|
+
docker run --rm -v "$(pwd):/levseq_results" yueminglong/levseq:levseq-1.2.5-<architecture> <name> <location to data folder> <location of reference csv file>
|
|
126
|
+
```
|
|
127
|
+
Explanation:
|
|
128
|
+
|
|
129
|
+
--rm: Automatically removes the container after the command finishes.
|
|
130
|
+
|
|
131
|
+
-v "$(pwd):/levseq\_results": Mounts the current directory ($(pwd)) to /levseq\_results inside the container, ensuring the results are saved to your current directory.
|
|
132
|
+
|
|
133
|
+
yueminglong/levseq:levseq-1.2.5-\<architecture\>: Specifies the Docker image to run. Replace \<architecture\> with the appropriate platform (e.g., x86).
|
|
134
|
+
|
|
135
|
+
\<name\>: The name or identifier for the analysis.
|
|
136
|
+
|
|
137
|
+
\<location to data folder\>: Path to the folder containing input data.
|
|
138
|
+
|
|
139
|
+
\<location of reference csv file\>: Path to the reference .csv file.
|
|
140
|
+
|
|
141
|
+
Important Notes:
|
|
142
|
+
|
|
143
|
+
If the current directory is mounted to the container (via -v "$(pwd):/levseq\_results"), the basecalled result in FASTQ format and the ref.csv file must be located in the current directory.
|
|
144
|
+
|
|
145
|
+
If these files are not present in the current directory, they will not be processed by the tool.
|
|
146
|
+
|
|
147
|
+
Output:
|
|
148
|
+
|
|
149
|
+
The results of the analysis will be saved to your current working directory.
|
|
150
|
+
|
|
117
151
|
See the [manuscrtipt notebook](https://github.com/fhalab/LevSeq/blob/main/manuscript/notebooks/epPCR_10plates.ipynb) for an example.
|
|
152
|
+
*Note: if using docker, the html and csv final output will be saved in the directory that you are running from instead of in the Platemaps or Results subfolder.
|
|
118
153
|
|
|
119
154
|
#### Required Arguments
|
|
120
155
|
1. Name of the experiment, this will be the name of the output folder
|
|
@@ -137,3 +172,7 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
137
172
|
#### Citing
|
|
138
173
|
|
|
139
174
|
If you have found LevSeq useful, please cite out [paper](https://doi.org/10.1101/2024.09.04.611255).
|
|
175
|
+
|
|
176
|
+
#### Contact
|
|
177
|
+
|
|
178
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -7,7 +7,7 @@ Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore tech
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
- Data to reproduce the results and to test are available on zenodo [](https://doi.org/10.5281/zenodo.13694463)
|
|
10
|
-
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://
|
|
10
|
+
- A dockerized website and database for labs to locally host and visualize their data: website is available [here](https://levseqdb.streamlit.app/) and code to host locally [here](https://github.com/fhalab/LevSeq_db)
|
|
11
11
|
|
|
12
12
|
## Setup
|
|
13
13
|
|
|
@@ -33,7 +33,7 @@ and `minimap2` installed on your path. However, if you have issues we recommend
|
|
|
33
33
|
We recommend using terminal and a conda environment for installation:
|
|
34
34
|
|
|
35
35
|
```
|
|
36
|
-
conda create --name levseq python=3.
|
|
36
|
+
conda create --name levseq python=3.12 -y
|
|
37
37
|
```
|
|
38
38
|
|
|
39
39
|
```
|
|
@@ -46,7 +46,7 @@ conda activate levseq
|
|
|
46
46
|
```
|
|
47
47
|
conda install -c bioconda -c conda-forge samtools
|
|
48
48
|
```
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
|
|
51
51
|
2. Minimap2: https://github.com/lh3/minimap2
|
|
52
52
|
|
|
@@ -63,11 +63,46 @@ operating system (https://docs.docker.com/engine/install/).
|
|
|
63
63
|
```
|
|
64
64
|
levseq <name of the run you can make this whatever> <location to data folder> <location of reference csv file>
|
|
65
65
|
```
|
|
66
|
+
|
|
66
67
|
#### Run via docker
|
|
68
|
+
If using linux system
|
|
69
|
+
```
|
|
70
|
+
docker pull yueminglong/levseq:levseq-1.2.5-x86
|
|
67
71
|
```
|
|
68
|
-
|
|
72
|
+
If using Mac M chips (image tested on M1, M3, and M4)
|
|
73
|
+
```
|
|
74
|
+
docker pull yueminglong/levseq:levseq-1.2.5-arm64
|
|
75
|
+
```
|
|
76
|
+
|
|
69
77
|
```
|
|
78
|
+
docker run --rm -v "$(pwd):/levseq_results" yueminglong/levseq:levseq-1.2.5-<architecture> <name> <location to data folder> <location of reference csv file>
|
|
79
|
+
```
|
|
80
|
+
Explanation:
|
|
81
|
+
|
|
82
|
+
--rm: Automatically removes the container after the command finishes.
|
|
83
|
+
|
|
84
|
+
-v "$(pwd):/levseq\_results": Mounts the current directory ($(pwd)) to /levseq\_results inside the container, ensuring the results are saved to your current directory.
|
|
85
|
+
|
|
86
|
+
yueminglong/levseq:levseq-1.2.5-\<architecture\>: Specifies the Docker image to run. Replace \<architecture\> with the appropriate platform (e.g., x86).
|
|
87
|
+
|
|
88
|
+
\<name\>: The name or identifier for the analysis.
|
|
89
|
+
|
|
90
|
+
\<location to data folder\>: Path to the folder containing input data.
|
|
91
|
+
|
|
92
|
+
\<location of reference csv file\>: Path to the reference .csv file.
|
|
93
|
+
|
|
94
|
+
Important Notes:
|
|
95
|
+
|
|
96
|
+
If the current directory is mounted to the container (via -v "$(pwd):/levseq\_results"), the basecalled result in FASTQ format and the ref.csv file must be located in the current directory.
|
|
97
|
+
|
|
98
|
+
If these files are not present in the current directory, they will not be processed by the tool.
|
|
99
|
+
|
|
100
|
+
Output:
|
|
101
|
+
|
|
102
|
+
The results of the analysis will be saved to your current working directory.
|
|
103
|
+
|
|
70
104
|
See the [manuscrtipt notebook](https://github.com/fhalab/LevSeq/blob/main/manuscript/notebooks/epPCR_10plates.ipynb) for an example.
|
|
105
|
+
*Note: if using docker, the html and csv final output will be saved in the directory that you are running from instead of in the Platemaps or Results subfolder.
|
|
71
106
|
|
|
72
107
|
#### Required Arguments
|
|
73
108
|
1. Name of the experiment, this will be the name of the output folder
|
|
@@ -89,4 +124,8 @@ For more details or trouble shooting please look at our [computational_protocols
|
|
|
89
124
|
|
|
90
125
|
#### Citing
|
|
91
126
|
|
|
92
|
-
If you have found LevSeq useful, please cite out [paper](https://doi.org/10.1101/2024.09.04.611255).
|
|
127
|
+
If you have found LevSeq useful, please cite out [paper](https://doi.org/10.1101/2024.09.04.611255).
|
|
128
|
+
|
|
129
|
+
#### Contact
|
|
130
|
+
|
|
131
|
+
Leave a feature request in the issues or reach us via [email](mailto:levseqdb@gmail.com).
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.2.
|
|
21
|
+
__version__ = '1.2.7'
|
|
22
22
|
__author__ = 'Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import esm
|
|
2
|
+
import torch
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from sklearn.decomposition import PCA
|
|
5
|
+
import os
|
|
6
|
+
import argparse
|
|
7
|
+
|
|
8
|
+
def preprocess_sequence(sequence):
|
|
9
|
+
"""
|
|
10
|
+
Preprocesses the amino acid sequence by removing everything after the first '*' (stop codon).
|
|
11
|
+
"""
|
|
12
|
+
if '*' in sequence:
|
|
13
|
+
sequence = sequence.split('*')[0] # Take everything before the first '*'
|
|
14
|
+
return sequence
|
|
15
|
+
|
|
16
|
+
def process_file(input_file, output_file=None):
|
|
17
|
+
# Load the dataset
|
|
18
|
+
data = pd.read_csv(input_file)
|
|
19
|
+
|
|
20
|
+
# Remove the "Unnamed: 0" column if it exists
|
|
21
|
+
if 'Unnamed: 0' in data.columns:
|
|
22
|
+
data = data.drop(columns=['Unnamed: 0'])
|
|
23
|
+
|
|
24
|
+
# Create the ID column as the combination of `Plate` and `Well`
|
|
25
|
+
data['ID'] = data['Plate'] + '-' + data['Well']
|
|
26
|
+
data = data[['ID'] + [col for col in data.columns if col != 'ID']] # Reorder to make ID the first column
|
|
27
|
+
|
|
28
|
+
# Filter valid sequences from the `aa_sequence` column
|
|
29
|
+
valid_sequences = data['aa_sequence'].dropna()
|
|
30
|
+
valid_sequences = valid_sequences[~valid_sequences.str.contains('#N.A.#|Deletion')]
|
|
31
|
+
|
|
32
|
+
# Preprocess sequences to handle stop codons
|
|
33
|
+
valid_sequences = valid_sequences.apply(preprocess_sequence)
|
|
34
|
+
|
|
35
|
+
# Load the ESM-2 model
|
|
36
|
+
model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
|
|
37
|
+
batch_converter = alphabet.get_batch_converter()
|
|
38
|
+
|
|
39
|
+
# Prepare sequences for embedding
|
|
40
|
+
sequences = valid_sequences.tolist()
|
|
41
|
+
sequence_names = [f"Sequence {i}" for i in range(len(sequences))]
|
|
42
|
+
batch_labels, batch_strs, batch_tokens = batch_converter(list(zip(sequence_names, sequences)))
|
|
43
|
+
|
|
44
|
+
# Extract embeddings
|
|
45
|
+
with torch.no_grad():
|
|
46
|
+
results = model(batch_tokens, repr_layers=[33])
|
|
47
|
+
embeddings = results["representations"][33] # Use the top (last) layer representations
|
|
48
|
+
|
|
49
|
+
# Average embeddings across residues for sequence-level representation
|
|
50
|
+
sequence_embeddings = embeddings.mean(1).numpy()
|
|
51
|
+
|
|
52
|
+
# Dimensionality Reduction using PCA
|
|
53
|
+
pca = PCA(n_components=2)
|
|
54
|
+
xy_coordinates = pca.fit_transform(sequence_embeddings)
|
|
55
|
+
|
|
56
|
+
# Add x, y coordinates back to the dataframe
|
|
57
|
+
xy_df = pd.DataFrame(xy_coordinates, columns=['x_coordinate', 'y_coordinate'], index=valid_sequences.index)
|
|
58
|
+
data = pd.concat([data, xy_df], axis=1)
|
|
59
|
+
|
|
60
|
+
# Determine output file location
|
|
61
|
+
if output_file is None:
|
|
62
|
+
input_name, input_ext = os.path.splitext(input_file)
|
|
63
|
+
output_file = f"{input_name}_xy{input_ext}"
|
|
64
|
+
|
|
65
|
+
# Save the updated dataframe to a file
|
|
66
|
+
data.to_csv(output_file, index=False)
|
|
67
|
+
print(f"Processed data with x, y coordinates saved to: {output_file}")
|
|
68
|
+
|
|
69
|
+
if __name__ == "__main__":
|
|
70
|
+
parser = argparse.ArgumentParser(description="Generate x, y coordinates for amino acid sequences")
|
|
71
|
+
parser.add_argument('input_file', type=str, help="Path to the input CSV file")
|
|
72
|
+
parser.add_argument('--output_file', type=str, default=None, help="Path to save the output CSV file (optional)")
|
|
73
|
+
args = parser.parse_args()
|
|
74
|
+
|
|
75
|
+
process_file(args.input_file, args.output_file)
|
|
76
|
+
|
|
@@ -275,11 +275,11 @@ def create_df_v(variants_df):
|
|
|
275
275
|
)
|
|
276
276
|
# Fill in 'Deletion' in 'aa_variant' column
|
|
277
277
|
df_variants_.loc[
|
|
278
|
-
df_variants_["nc_variant"] == "
|
|
279
|
-
] = "
|
|
278
|
+
df_variants_["nc_variant"] == "#DEL#", "aa_variant"
|
|
279
|
+
] = "#DEL#"
|
|
280
280
|
df_variants_.loc[
|
|
281
|
-
df_variants_["nc_variant"] == "
|
|
282
|
-
] = "
|
|
281
|
+
df_variants_["nc_variant"] == "#INS#", "aa_variant"
|
|
282
|
+
] = "#INS#"
|
|
283
283
|
|
|
284
284
|
# Compare aa_variant with translated refseq and generate Substitutions column
|
|
285
285
|
df_variants_["Substitutions"] = df_variants_.apply(get_mutations, axis=1)
|
|
@@ -291,7 +291,7 @@ def create_df_v(variants_df):
|
|
|
291
291
|
# Fill in Deletion into Substitutions Column, keep #N.A.# unchanged
|
|
292
292
|
for i in df_variants_.index:
|
|
293
293
|
if df_variants_["nc_variant"].iloc[i] == "Deletion":
|
|
294
|
-
df_variants_.Substitutions.iat[i] = df_variants_.Substitutions.iat[i].replace("", "
|
|
294
|
+
df_variants_.Substitutions.iat[i] = df_variants_.Substitutions.iat[i].replace("", "#DEL#")
|
|
295
295
|
elif df_variants_["nc_variant"].iloc[i] == "#N.A.#":
|
|
296
296
|
df_variants_.Substitutions.iat[i] = "#N.A.#"
|
|
297
297
|
|
|
@@ -321,30 +321,36 @@ def create_df_v(variants_df):
|
|
|
321
321
|
df_variants_["Plate"] = df_variants_["Plate"].apply(
|
|
322
322
|
lambda x: f"0{x}" if len(x) == 1 else x
|
|
323
323
|
)
|
|
324
|
-
|
|
324
|
+
|
|
325
|
+
# First rename columns as before
|
|
325
326
|
df_variants_.rename(columns={
|
|
326
327
|
"Variant": "nucleotide_mutation",
|
|
327
|
-
"Substitutions": "
|
|
328
|
+
"Substitutions": "amino_acid_substitutions",
|
|
328
329
|
"nc_variant": "nt_sequence",
|
|
329
330
|
"aa_variant": "aa_sequence"
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
restructured_df = df_variants_
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
331
|
+
}, inplace=True)
|
|
332
|
+
|
|
333
|
+
# Create a copy for restructuring to avoid affecting the original
|
|
334
|
+
restructured_df = df_variants_.copy()
|
|
335
|
+
restructured_df.columns = restructured_df.columns.str.lower().str.replace('[\s-]', '_', regex=True)
|
|
336
|
+
# Fix the specific column name
|
|
337
|
+
restructured_df.columns = restructured_df.columns.str.replace('p_adj._value', 'p_adj_value')
|
|
338
|
+
|
|
339
|
+
# Select the desired columns in the desired order
|
|
340
|
+
restructured_df = restructured_df[[
|
|
341
|
+
"barcode_plate",
|
|
342
|
+
"plate",
|
|
343
|
+
"well",
|
|
344
|
+
"alignment_count",
|
|
345
|
+
"nucleotide_mutation",
|
|
346
|
+
"amino_acid_substitutions",
|
|
347
|
+
"alignment_probability",
|
|
348
|
+
"average_mutation_frequency",
|
|
349
|
+
"p_value",
|
|
350
|
+
"p_adj_value",
|
|
351
|
+
"nt_sequence",
|
|
352
|
+
"aa_sequence"
|
|
353
|
+
]]
|
|
348
354
|
|
|
349
355
|
return restructured_df, df_variants_
|
|
350
356
|
|
|
@@ -357,9 +363,9 @@ def create_nc_variant(variant, refseq):
|
|
|
357
363
|
elif variant == "#PARENT#":
|
|
358
364
|
return refseq
|
|
359
365
|
elif "DEL" in variant:
|
|
360
|
-
return "
|
|
366
|
+
return "#DEL#"
|
|
361
367
|
elif variant == '+':
|
|
362
|
-
return "
|
|
368
|
+
return "#INS#"
|
|
363
369
|
else:
|
|
364
370
|
mutations = variant.split("_")
|
|
365
371
|
nc_variant = list(refseq)
|
|
@@ -459,7 +465,7 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
459
465
|
logging.info(f"Fasta file for {name} already exists. Skipping write.")
|
|
460
466
|
|
|
461
467
|
barcode_path = filter_bc(cl_args, name_folder, i)
|
|
462
|
-
output_dir = Path(result_folder) / "
|
|
468
|
+
output_dir = Path(result_folder) / f"{cl_args['name']}_fastq"
|
|
463
469
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
464
470
|
|
|
465
471
|
if not cl_args["skip_demultiplexing"]:
|
|
@@ -485,17 +491,25 @@ def process_ref_csv(cl_args, tqdm_fn=tqdm.tqdm):
|
|
|
485
491
|
continue
|
|
486
492
|
|
|
487
493
|
variant_df.to_csv(variant_csv_path, index=False)
|
|
488
|
-
return variant_df
|
|
494
|
+
return variant_df, ref_df
|
|
489
495
|
|
|
490
496
|
# Main function to run LevSeq and ensure saving of intermediate results if an error occurs
|
|
491
497
|
def run_LevSeq(cl_args, tqdm_fn=tqdm.tqdm):
|
|
492
498
|
result_folder = create_result_folder(cl_args)
|
|
499
|
+
# Ref folder for saving ref csv file
|
|
500
|
+
ref_folder = os.path.join(result_folder, "ref")
|
|
501
|
+
os.makedirs(ref_folder, exist_ok=True)
|
|
502
|
+
|
|
493
503
|
configure_logging(result_folder)
|
|
504
|
+
logging.info("Logging configured. Starting program.")
|
|
494
505
|
|
|
495
506
|
variant_df = pd.DataFrame(columns=["barcode_plate", "name", "refseq", "variant"])
|
|
496
507
|
|
|
497
508
|
try:
|
|
498
|
-
variant_df = process_ref_csv(cl_args, tqdm_fn)
|
|
509
|
+
variant_df, ref_df = process_ref_csv(cl_args, tqdm_fn)
|
|
510
|
+
ref_df_path = os.path.join(ref_folder, cl_args["name"]+".csv")
|
|
511
|
+
ref_df.to_csv(ref_df_path, index=False)
|
|
512
|
+
|
|
499
513
|
if variant_df.empty:
|
|
500
514
|
logging.warning("No data found during CSV processing. The CSV is empty.")
|
|
501
515
|
except Exception as e:
|