levseq 1.1.0__tar.gz → 1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {levseq-1.1.0/levseq.egg-info → levseq-1.2.1}/PKG-INFO +2 -2
- {levseq-1.1.0 → levseq-1.2.1}/README.md +1 -1
- {levseq-1.1.0 → levseq-1.2.1}/levseq/__init__.py +1 -1
- {levseq-1.1.0 → levseq-1.2.1}/levseq/run_levseq.py +41 -30
- {levseq-1.1.0 → levseq-1.2.1}/levseq/seqfit.py +32 -28
- {levseq-1.1.0 → levseq-1.2.1}/levseq/visualization.py +4 -4
- {levseq-1.1.0 → levseq-1.2.1/levseq.egg-info}/PKG-INFO +2 -2
- {levseq-1.1.0 → levseq-1.2.1}/LICENSE +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/MANIFEST.in +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/IO_processor.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/barcoding/__init__.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/barcoding/demultiplex +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/barcoding/demultiplex-arm64 +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/barcoding/demultiplex-x86 +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/barcoding/minion_barcodes.fasta +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/basecaller.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/cmd.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/globals.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/interface.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/parser.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/screen.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/simulation.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/user.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/utils.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq/variantcaller.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq.egg-info/SOURCES.txt +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq.egg-info/dependency_links.txt +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq.egg-info/entry_points.txt +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq.egg-info/requires.txt +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/levseq.egg-info/top_level.txt +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/setup.cfg +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/setup.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_demultiplex_docker.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_opligopools.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_seqfitvis.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_seqs.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_statistics.py +0 -0
- {levseq-1.1.0 → levseq-1.2.1}/tests/test_variant_calling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.1
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -48,7 +48,7 @@ Requires-Dist: tqdm
|
|
|
48
48
|
|
|
49
49
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
50
50
|
|
|
51
|
-

|
|
52
52
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
53
53
|
|
|
54
54
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
7
7
|
|
|
8
8
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
__title__ = 'levseq'
|
|
19
19
|
__description__ = 'LevSeq nanopore sequencing'
|
|
20
20
|
__url__ = 'https://github.com/fhalab/levseq/'
|
|
21
|
-
__version__ = '1.1
|
|
21
|
+
__version__ = '1.2.1'
|
|
22
22
|
__author__ = 'Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li'
|
|
23
23
|
__author_email__ = 'ylong@caltech.edu'
|
|
24
24
|
__license__ = 'GPL3'
|
|
@@ -221,7 +221,7 @@ def demux_fastq(file_to_fastq, result_folder, barcode_path):
|
|
|
221
221
|
executable_path = package_root / "levseq" / "barcoding" / executable_name
|
|
222
222
|
if not executable_path.exists():
|
|
223
223
|
raise FileNotFoundError(f"Executable not found: {executable_path}")
|
|
224
|
-
seq_min =
|
|
224
|
+
seq_min = 200
|
|
225
225
|
seq_max = 10000
|
|
226
226
|
prompt = f"{executable_path} -f {file_to_fastq} -d {result_folder} -b {barcode_path} -w 100 -r 100 -m {seq_min} -x {seq_max}"
|
|
227
227
|
subprocess.run(prompt, shell=True, check=True)
|
|
@@ -258,7 +258,7 @@ def create_df_v(variants_df):
|
|
|
258
258
|
|
|
259
259
|
# Translate nc_variant to aa_variant
|
|
260
260
|
df_variants_["aa_variant"] = df_variants_["nc_variant"].apply(
|
|
261
|
-
|
|
261
|
+
lambda x: x if x in ["Deletion", "#N.A.#"] else translate(x)
|
|
262
262
|
)
|
|
263
263
|
# Fill in 'Deletion' in 'aa_variant' column
|
|
264
264
|
df_variants_.loc[
|
|
@@ -284,10 +284,13 @@ def create_df_v(variants_df):
|
|
|
284
284
|
df_variants_["Alignment Probability"] = df_variants_["Alignment Probability"].fillna(0.0)
|
|
285
285
|
df_variants_["Alignment Count"] = df_variants_["Alignment Count"].fillna(0.0)
|
|
286
286
|
|
|
287
|
-
# Fill in Deletion into Substitutions Column
|
|
287
|
+
# Fill in Deletion into Substitutions Column, keep #N.A.# unchanged
|
|
288
288
|
for i in df_variants_.index:
|
|
289
289
|
if df_variants_["nc_variant"].iloc[i] == "Deletion":
|
|
290
290
|
df_variants_.Substitutions.iat[i] = df_variants_.Substitutions.iat[i].replace("", "-")
|
|
291
|
+
elif df_variants_["nc_variant"].iloc[i] == "#N.A.#":
|
|
292
|
+
df_variants_.Substitutions.iat[i] = "#N.A.#"
|
|
293
|
+
|
|
291
294
|
|
|
292
295
|
# Add row and columns
|
|
293
296
|
Well = df_variants_["Well"].tolist()
|
|
@@ -312,40 +315,41 @@ def create_df_v(variants_df):
|
|
|
312
315
|
df_variants_["Plate"] = df_variants_["Plate"].apply(
|
|
313
316
|
lambda x: f"0{x}" if len(x) == 1 else x
|
|
314
317
|
)
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
318
|
+
# Rename columns as per the request
|
|
319
|
+
df_variants_.rename(columns={
|
|
320
|
+
"Variant": "nucleotide_mutation",
|
|
321
|
+
"Substitutions": "amino-acid_substitutions",
|
|
322
|
+
"nc_variant": "nt_sequence",
|
|
323
|
+
"aa_variant": "aa_sequence"
|
|
321
324
|
}, inplace=True)
|
|
322
325
|
|
|
326
|
+
|
|
323
327
|
# Select the desired columns in the desired order
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
328
|
+
restructured_df = df_variants_[
|
|
329
|
+
[
|
|
330
|
+
"barcode_plate",
|
|
331
|
+
"Plate",
|
|
332
|
+
"Well",
|
|
333
|
+
"Alignment Count",
|
|
334
|
+
"nucleotide_mutation",
|
|
335
|
+
"amino-acid_substitutions",
|
|
336
|
+
"Alignment Probability",
|
|
337
|
+
"Average mutation frequency",
|
|
338
|
+
"P value",
|
|
339
|
+
"P adj. value",
|
|
340
|
+
"nt_sequence",
|
|
341
|
+
"aa_sequence",
|
|
342
|
+
]
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
return restructured_df, df_variants_
|
|
342
346
|
|
|
343
347
|
# Helper functions for create_df_v
|
|
344
348
|
def create_nc_variant(variant, refseq):
|
|
345
349
|
if isinstance(variant, np.ndarray):
|
|
346
350
|
variant = variant.tolist()
|
|
347
351
|
if variant == "" or pd.isnull(variant):
|
|
348
|
-
return
|
|
352
|
+
return "#N.A.#" # Return #N.A.# if variant is empty or null
|
|
349
353
|
elif variant == "#PARENT#":
|
|
350
354
|
return refseq
|
|
351
355
|
elif "DEL" in variant:
|
|
@@ -362,19 +366,25 @@ def create_nc_variant(variant, refseq):
|
|
|
362
366
|
nc_variant[position] = new
|
|
363
367
|
return "".join(nc_variant)
|
|
364
368
|
|
|
369
|
+
|
|
365
370
|
def is_valid_dna_sequence(sequence):
|
|
366
371
|
return all(nucleotide in 'ATGC' for nucleotide in sequence) and len(sequence) % 3 == 0
|
|
367
372
|
|
|
368
373
|
def get_mutations(row):
|
|
369
374
|
try:
|
|
370
|
-
|
|
375
|
+
alignment_count = row["Alignment Count"]
|
|
371
376
|
|
|
377
|
+
# Check if alignment_count is zero and return "#N.A.#" if true
|
|
378
|
+
if alignment_count == 0:
|
|
379
|
+
return "#N.A.#"
|
|
380
|
+
|
|
381
|
+
refseq = row["refseq"]
|
|
382
|
+
|
|
372
383
|
if not is_valid_dna_sequence(refseq):
|
|
373
384
|
return "Invalid refseq provided, check template sequence. Only A, T, G, C and sequence dividable by 3 are accepted."
|
|
374
385
|
|
|
375
386
|
refseq_aa = translate(refseq)
|
|
376
387
|
variant_aa = row["aa_variant"]
|
|
377
|
-
alignment_count = row["Alignment Count"]
|
|
378
388
|
|
|
379
389
|
if variant_aa == "Deletion":
|
|
380
390
|
return ""
|
|
@@ -400,6 +410,7 @@ def get_mutations(row):
|
|
|
400
410
|
)
|
|
401
411
|
raise
|
|
402
412
|
|
|
413
|
+
|
|
403
414
|
# Save plate maps and CSV
|
|
404
415
|
def save_platemap_to_file(heatmaps, outputdir, name, show_msa):
|
|
405
416
|
if not os.path.exists(os.path.join(outputdir, "Platemaps")):
|
|
@@ -124,7 +124,7 @@ def normalise_calculate_stats(processed_plate_df, value_columns, normalise='stan
|
|
|
124
124
|
for plate in set(processed_plate_df['Plate'].values):
|
|
125
125
|
for value_column in value_columns:
|
|
126
126
|
sub_df = processed_plate_df[processed_plate_df['Plate'] == plate]
|
|
127
|
-
parent_values = sub_df[sub_df['
|
|
127
|
+
parent_values = sub_df[sub_df['amino-acid_substitutions'] == parent][value_column].values
|
|
128
128
|
parent_mean = np.mean(parent_values)
|
|
129
129
|
parent_sd = np.std(parent_values)
|
|
130
130
|
|
|
@@ -140,14 +140,14 @@ def normalise_calculate_stats(processed_plate_df, value_columns, normalise='stan
|
|
|
140
140
|
|
|
141
141
|
sd_cutoff = 1.5 # The number of standard deviations we want above the parent values
|
|
142
142
|
# Now for all the other mutations we want to look if they are significant, first we'll look at combinations and then individually
|
|
143
|
-
grouped_by_mutations = processed_plate_df.groupby('
|
|
143
|
+
grouped_by_mutations = processed_plate_df.groupby('amino-acid_substitutions')
|
|
144
144
|
|
|
145
145
|
rows = []
|
|
146
146
|
for mutation, grp in tqdm(grouped_by_mutations):
|
|
147
147
|
# Get the values and then do a ranksum test
|
|
148
148
|
if mutation != parent:
|
|
149
149
|
for value_column in normalised_value_columns:
|
|
150
|
-
parent_values = list(processed_plate_df[processed_plate_df['
|
|
150
|
+
parent_values = list(processed_plate_df[processed_plate_df['amino-acid_substitutions'] == parent][value_column].values)
|
|
151
151
|
parent_mean = np.mean(parent_values)
|
|
152
152
|
parent_sd = np.std(parent_values)
|
|
153
153
|
|
|
@@ -164,7 +164,7 @@ def normalise_calculate_stats(processed_plate_df, value_columns, normalise='stan
|
|
|
164
164
|
rows.append(
|
|
165
165
|
[value_column, mutation, len(grp), mean_vals, std_vals, median_vals, mean_vals - parent_mean, sig,
|
|
166
166
|
U1, p])
|
|
167
|
-
stats_df = pd.DataFrame(rows, columns=['value_column', '
|
|
167
|
+
stats_df = pd.DataFrame(rows, columns=['value_column', 'amino-acid_substitutions', 'number of wells with amino-acid substitutions', 'mean', 'std',
|
|
168
168
|
'median', 'amount greater than parent mean',
|
|
169
169
|
f'greater than > {sd_cutoff} parent', 'man whitney U stat', 'p-value'])
|
|
170
170
|
return stats_df
|
|
@@ -275,10 +275,13 @@ def work_up_lcms(
|
|
|
275
275
|
return series
|
|
276
276
|
|
|
277
277
|
df["Sample Vial Number"] = fill_vial_number(df["Sample Vial Number"].copy())
|
|
278
|
+
# Drop empty ones!
|
|
279
|
+
df = df[df["Sample Vial Number"] != 0]
|
|
278
280
|
# Remove unwanted wells
|
|
279
281
|
df = df[df["Sample Name"] != drop_string]
|
|
280
282
|
# Get wells
|
|
281
|
-
|
|
283
|
+
|
|
284
|
+
df.insert(0, "Well", df["Sample Vial Number"].apply(lambda x: str(x).split("-")[-1]))
|
|
282
285
|
# Rename
|
|
283
286
|
df = df.rename({"Sample Name": "Plate"}, axis="columns")
|
|
284
287
|
# Create minimal DataFrame
|
|
@@ -290,7 +293,7 @@ def work_up_lcms(
|
|
|
290
293
|
index=["Well", "Plate"], columns="Compound Name", values="Area", aggfunc="max"
|
|
291
294
|
).reset_index()
|
|
292
295
|
# Get rows and columns
|
|
293
|
-
df.insert(1, "Column", df["Well"].apply(lambda x: int(x[1:])))
|
|
296
|
+
df.insert(1, "Column", df["Well"].apply(lambda x: int(x[1:]) if x[1:].isdigit() else None))
|
|
294
297
|
df.insert(1, "Row", df["Well"].apply(lambda x: x[0]))
|
|
295
298
|
# Set values as floats
|
|
296
299
|
cols = products + substrates if substrates is not None else products
|
|
@@ -330,9 +333,10 @@ def process_plate_files(product: str, input_csv: str) -> pd.DataFrame:
|
|
|
330
333
|
# Load the provided CSV file
|
|
331
334
|
results_df = pd.read_csv(input_csv)
|
|
332
335
|
|
|
333
|
-
# Extract the required columns: Plate, Well,
|
|
334
|
-
|
|
335
|
-
filtered_df =
|
|
336
|
+
# Extract the required columns: Plate, Well, amino-acid_substitutionss, and nt_sequence, and remove rows with '#N.A.#' and NaN values
|
|
337
|
+
# barcode_plate Plate Well Alignment Count nucleotide_amino-acid_substitutions amino-acid_substitutions Alignment Probability Average amino-acid_substitutions frequency P value P adj. value nt_sequence aa_sequence
|
|
338
|
+
filtered_df = results_df[["Plate", "Well", "amino-acid_substitutions", "nt_sequence", "aa_sequence"]]
|
|
339
|
+
filtered_df = filtered_df[(filtered_df["amino-acid_substitutions"] != "#N.A.#")].dropna()
|
|
336
340
|
|
|
337
341
|
# Extract the unique entries of Plate
|
|
338
342
|
unique_plates = filtered_df["Plate"].unique()
|
|
@@ -357,14 +361,14 @@ def process_plate_files(product: str, input_csv: str) -> pd.DataFrame:
|
|
|
357
361
|
plate_df = plate_object.df
|
|
358
362
|
plate_df["Plate"] = plate # Add the plate identifier for reference
|
|
359
363
|
|
|
360
|
-
# Merge filtered_df with plate_df to retain
|
|
364
|
+
# Merge filtered_df with plate_df to retain amino-acid_substitutionss and nt_sequence columns
|
|
361
365
|
merged_df = pd.merge(
|
|
362
366
|
plate_df, filtered_df, on=["Plate", "Well"], how="left"
|
|
363
367
|
)
|
|
364
368
|
columns_order = (
|
|
365
|
-
["Plate", "Well", "Row", "Column", "
|
|
369
|
+
["Plate", "Well", "Row", "Column", "amino-acid_substitutions"]
|
|
366
370
|
+ product
|
|
367
|
-
+ ["
|
|
371
|
+
+ ["nt_sequence", "aa_sequence"]
|
|
368
372
|
)
|
|
369
373
|
merged_df = merged_df[columns_order]
|
|
370
374
|
processed_data.append(merged_df)
|
|
@@ -374,13 +378,13 @@ def process_plate_files(product: str, input_csv: str) -> pd.DataFrame:
|
|
|
374
378
|
processed_df = pd.concat(processed_data, ignore_index=True)
|
|
375
379
|
else:
|
|
376
380
|
processed_df = pd.DataFrame(
|
|
377
|
-
columns=["Plate", "Well", "Row", "Column", "
|
|
381
|
+
columns=["Plate", "Well", "Row", "Column", "amino-acid_substitutions"]
|
|
378
382
|
+ product
|
|
379
|
-
+ ["
|
|
383
|
+
+ ["nt_sequence", "aa_sequence"]
|
|
380
384
|
)
|
|
381
385
|
|
|
382
386
|
# Ensure all entries in 'Mutations' are treated as strings
|
|
383
|
-
processed_df["
|
|
387
|
+
processed_df["amino-acid_substitutions"] = processed_df["amino-acid_substitutions"].astype(str)
|
|
384
388
|
|
|
385
389
|
# Remove any rows with empty values
|
|
386
390
|
processed_df = processed_df.dropna()
|
|
@@ -420,19 +424,19 @@ def match_plate2parent(df: pd.DataFrame, parent_dict: Optional[Dict] = None) ->
|
|
|
420
424
|
|
|
421
425
|
if parent_dict is None:
|
|
422
426
|
|
|
423
|
-
# add
|
|
424
|
-
if "
|
|
425
|
-
df["
|
|
426
|
-
Bio.sequence.Sequence(df["
|
|
427
|
+
# add aa_sequence column if not present by translating from the nt_sequence column
|
|
428
|
+
if "aa_sequence" not in df.columns:
|
|
429
|
+
df["aa_sequence"] = df["nt_sequence"].apply(
|
|
430
|
+
Bio.sequence.Sequence(df["nt_sequence"]).translate
|
|
427
431
|
)
|
|
428
432
|
|
|
429
433
|
# get all the parents from the df
|
|
430
|
-
parents = df[df["
|
|
434
|
+
parents = df[df["amino-acid_substitutions"] == "#PARENT#"].reset_index(drop=True).copy()
|
|
431
435
|
|
|
432
|
-
# get the parent
|
|
436
|
+
# get the parent nt_sequence
|
|
433
437
|
parent_aas = (
|
|
434
|
-
df[df["
|
|
435
|
-
.drop_duplicates()["
|
|
438
|
+
df[df["amino-acid_substitutions"] == "#PARENT#"][["amino-acid_substitutions", "aa_sequence"]]
|
|
439
|
+
.drop_duplicates()["aa_sequence"]
|
|
436
440
|
.tolist()
|
|
437
441
|
)
|
|
438
442
|
|
|
@@ -440,7 +444,7 @@ def match_plate2parent(df: pd.DataFrame, parent_dict: Optional[Dict] = None) ->
|
|
|
440
444
|
|
|
441
445
|
# get the plate names for each parent
|
|
442
446
|
parent2plate = {
|
|
443
|
-
p_name: df[df["
|
|
447
|
+
p_name: df[df["aa_sequence"] == p_seq]["Plate"].unique().tolist()
|
|
444
448
|
for p_name, p_seq in parent_dict.items()
|
|
445
449
|
}
|
|
446
450
|
|
|
@@ -516,7 +520,7 @@ def norm2parent(plate_df: pd.DataFrame) -> pd.DataFrame:
|
|
|
516
520
|
|
|
517
521
|
# get all the parents from the df
|
|
518
522
|
parents = (
|
|
519
|
-
plate_df[plate_df["
|
|
523
|
+
plate_df[plate_df["amino-acid_substitutions"] == "#PARENT#"].reset_index(drop=True).copy()
|
|
520
524
|
)
|
|
521
525
|
filtered_parents = (
|
|
522
526
|
parents.drop(index=detect_outliers_iqr(parents["pdt"]))
|
|
@@ -610,7 +614,7 @@ def get_single_ssm_site_df(
|
|
|
610
614
|
# get parents from those plates
|
|
611
615
|
site_parent_df = (
|
|
612
616
|
single_ssm_df[
|
|
613
|
-
(single_ssm_df["
|
|
617
|
+
(single_ssm_df["amino-acid_substitutions"] == "#PARENT#")
|
|
614
618
|
& (single_ssm_df["Plate"].isin(site_df["Plate"].unique()))
|
|
615
619
|
]
|
|
616
620
|
.reset_index(drop=True)
|
|
@@ -1145,12 +1149,12 @@ def gen_seqfitvis(
|
|
|
1145
1149
|
|
|
1146
1150
|
df = pd.read_csv(seqfit_path)
|
|
1147
1151
|
# ignore deletion meaning "Mutations" == "-"
|
|
1148
|
-
df = df[df["
|
|
1152
|
+
df = df[df["amino-acid_substitutions"] != "-"].copy()
|
|
1149
1153
|
# count number of sites mutated and append mutation details
|
|
1150
1154
|
# df["num_sites"] = df['Mutations'].apply(lambda x: 0 if x == "#PARENT#" else len(x.split("_")))
|
|
1151
1155
|
|
|
1152
1156
|
# Apply function to the column
|
|
1153
|
-
df[["num_sites", "mut_dets"]] = df["
|
|
1157
|
+
df[["num_sites", "mut_dets"]] = df["amino-acid_substitutions"].apply(process_mutation)
|
|
1154
1158
|
|
|
1155
1159
|
# apply the norm function to all plates
|
|
1156
1160
|
df = df.groupby("Plate").apply(norm2parent).reset_index(drop=True).copy()
|
|
@@ -142,7 +142,7 @@ def _make_platemap(df, title, cmap=None):
|
|
|
142
142
|
'Row': list('ABCDEFGH'),
|
|
143
143
|
'Column': [str(i) for i in range(1, 13)],
|
|
144
144
|
'logseqdepth': [0] * 96,
|
|
145
|
-
'
|
|
145
|
+
'amino-acid_substitutions': [''] * 96,
|
|
146
146
|
'Alignment Count': [0] * 96,
|
|
147
147
|
'Alignment Probability': [0] * 96
|
|
148
148
|
})
|
|
@@ -195,7 +195,7 @@ def _make_platemap(df, title, cmap=None):
|
|
|
195
195
|
|
|
196
196
|
# add tooltips
|
|
197
197
|
tooltips = [
|
|
198
|
-
("
|
|
198
|
+
("amino-acid_substitutions", "@amino-acid_substitutions"),
|
|
199
199
|
("Alignment Count", "@Alignment Count"),
|
|
200
200
|
("Alignment Probability", "@Alignment Probability"),
|
|
201
201
|
]
|
|
@@ -211,7 +211,7 @@ def _make_platemap(df, title, cmap=None):
|
|
|
211
211
|
kdims=["Column", "Row"],
|
|
212
212
|
vdims=[
|
|
213
213
|
"logseqdepth",
|
|
214
|
-
"
|
|
214
|
+
"amino-acid_substitutions",
|
|
215
215
|
"Alignment Count",
|
|
216
216
|
"Alignment Probability",
|
|
217
217
|
],
|
|
@@ -300,7 +300,7 @@ def _make_platemap(df, title, cmap=None):
|
|
|
300
300
|
return new_line_mutations
|
|
301
301
|
|
|
302
302
|
_df = df.copy()
|
|
303
|
-
_df["Labels"] = _df["
|
|
303
|
+
_df["Labels"] = _df["amino-acid_substitutions"].apply(split_variant_labels)
|
|
304
304
|
|
|
305
305
|
# Set the font size based on if #PARENT# is in a well and num of mutations
|
|
306
306
|
max_num_mutations = _df["Labels"].apply(lambda x: len(x.split("\n"))).max()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: levseq
|
|
3
|
-
Version: 1.1
|
|
3
|
+
Version: 1.2.1
|
|
4
4
|
Home-page: https://github.com/fhalab/levseq/
|
|
5
5
|
Author: Yueming Long, Emreay Gursoy, Ariane Mora, Francesca-Zhoufan Li
|
|
6
6
|
Author-email: ylong@caltech.edu
|
|
@@ -48,7 +48,7 @@ Requires-Dist: tqdm
|
|
|
48
48
|
|
|
49
49
|
In directed evolution, sequencing every variant enhances data insight and creates datasets suitable for AI/ML methods. This method is presented as an extension of the original Every Variant Sequencer using Illumina technology. With this approach, sequence variants can be generated within a day at an extremely low cost.
|
|
50
50
|
|
|
51
|
-

|
|
52
52
|
Figure 1: Overview of the LevSeq variant sequencing workflow using Nanopore technology. This diagram illustrates the key steps in the process, from sample preparation to data analysis and visualization.
|
|
53
53
|
|
|
54
54
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|