mgnify-pipelines-toolkit 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +35 -23
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +18 -17
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +37 -9
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/pyproject.toml +1 -1
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/README.md +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.1 → mgnify_pipelines_toolkit-1.2.3}/setup.cfg +0 -0
|
@@ -22,12 +22,12 @@ import os
|
|
|
22
22
|
import logging
|
|
23
23
|
import json
|
|
24
24
|
import time
|
|
25
|
+
import numpy as np
|
|
25
26
|
|
|
26
27
|
from mgnify_pipelines_toolkit.constants.thresholds import (
|
|
27
28
|
MIN_OVERLAP,
|
|
28
29
|
MIN_SEQ_COUNT,
|
|
29
30
|
MAX_ERROR_PROPORTION,
|
|
30
|
-
MAX_INTERNAL_PRIMER_PROPORTION,
|
|
31
31
|
)
|
|
32
32
|
from mgnify_pipelines_toolkit.constants.var_region_coordinates import (
|
|
33
33
|
REGIONS_16S_BACTERIA,
|
|
@@ -62,8 +62,16 @@ def get_multiregion(raw_sequence_coords, regions):
|
|
|
62
62
|
|
|
63
63
|
Returns:
|
|
64
64
|
amplified_region: Amplified variable regions.
|
|
65
|
+
region_coverages: Coverage of all detected variable regions
|
|
65
66
|
|
|
66
67
|
"""
|
|
68
|
+
|
|
69
|
+
region_coverages = defaultdict(float)
|
|
70
|
+
|
|
71
|
+
for region, limits in regions.items():
|
|
72
|
+
overlap = calc_overlap(raw_sequence_coords, limits)
|
|
73
|
+
region_coverages[region] = overlap
|
|
74
|
+
|
|
67
75
|
# check if any of the coords are inside the region
|
|
68
76
|
matched_regions = [
|
|
69
77
|
region
|
|
@@ -76,7 +84,7 @@ def get_multiregion(raw_sequence_coords, regions):
|
|
|
76
84
|
amplified_region = matched_regions[0]
|
|
77
85
|
else:
|
|
78
86
|
amplified_region = ""
|
|
79
|
-
return amplified_region
|
|
87
|
+
return amplified_region, region_coverages
|
|
80
88
|
|
|
81
89
|
|
|
82
90
|
def check_primer_position(raw_sequence_coords, regions):
|
|
@@ -90,7 +98,7 @@ def check_primer_position(raw_sequence_coords, regions):
|
|
|
90
98
|
|
|
91
99
|
"""
|
|
92
100
|
result_flag = False
|
|
93
|
-
margin =
|
|
101
|
+
margin = 10 # allowed margin of error
|
|
94
102
|
for coord in raw_sequence_coords:
|
|
95
103
|
for region in regions.values():
|
|
96
104
|
if coord in range(region[0] + margin, region[1] - margin):
|
|
@@ -342,22 +350,30 @@ def retrieve_regions(
|
|
|
342
350
|
per_read_info = (
|
|
343
351
|
dict()
|
|
344
352
|
) # dictionary will contain read names for each variable region
|
|
353
|
+
all_region_coverages = defaultdict(lambda: defaultdict(list))
|
|
345
354
|
for read in data:
|
|
355
|
+
# Example structure of `read`
|
|
356
|
+
# ('ERR14650515.1', 'SSU_rRNA_archaea', 'RF01959', 'hmm', '3', '525', '1', '518', '+', '-', '6', '0.55', '0.6', '363.6', '7.8e-107')
|
|
346
357
|
regions = determine_cm(read[2])
|
|
347
358
|
sequence_counter_total += 1
|
|
348
359
|
limits = list(map(int, read[4:6]))
|
|
349
360
|
domain = determine_domain(read[2])
|
|
350
361
|
marker_gene = determine_marker_gene(domain)
|
|
351
362
|
if not regions == "unsupported":
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
363
|
+
matches, coverages = get_multiregion(limits, regions)
|
|
364
|
+
|
|
365
|
+
[
|
|
366
|
+
all_region_coverages[domain][region].append(coverage)
|
|
367
|
+
for region, coverage in coverages.items()
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
multiregion_matches.setdefault(read[2], []).append(matches)
|
|
355
371
|
if check_primer_position(limits, regions):
|
|
356
372
|
primer_inside_vr += 1
|
|
357
373
|
sequence_counter_useful += 1
|
|
358
|
-
per_read_info.setdefault(
|
|
359
|
-
|
|
360
|
-
)
|
|
374
|
+
per_read_info.setdefault(marker_gene + "." + matches, []).append(
|
|
375
|
+
read[0]
|
|
376
|
+
)
|
|
361
377
|
else:
|
|
362
378
|
unsupported_matches += 1
|
|
363
379
|
|
|
@@ -385,18 +401,6 @@ def retrieve_regions(
|
|
|
385
401
|
)
|
|
386
402
|
continue
|
|
387
403
|
|
|
388
|
-
# filter out runs with too many sequences starting/ending inside variable regions
|
|
389
|
-
internal_seq_fract = primer_inside_vr / len(data)
|
|
390
|
-
if internal_seq_fract > MAX_INTERNAL_PRIMER_PROPORTION:
|
|
391
|
-
failed_run_counter += 1
|
|
392
|
-
logging.info("No output will be produced - too many internal mappings")
|
|
393
|
-
logging.info(
|
|
394
|
-
"Excluded due to high proportion of internal primers:\t{}\t{}\n".format(
|
|
395
|
-
tblout_file, "{0:.2f}".format(internal_seq_fract)
|
|
396
|
-
)
|
|
397
|
-
)
|
|
398
|
-
continue
|
|
399
|
-
|
|
400
404
|
normalised_matches[run_id] = dict()
|
|
401
405
|
region_counter = defaultdict(int)
|
|
402
406
|
|
|
@@ -432,14 +436,12 @@ def retrieve_regions(
|
|
|
432
436
|
multiregion_matches[model] = new_value
|
|
433
437
|
|
|
434
438
|
[multiregion_matches.pop(model) for model in models_to_remove]
|
|
435
|
-
print(multiregion_matches)
|
|
436
439
|
|
|
437
440
|
run_status = "one"
|
|
438
441
|
run_result = dict()
|
|
439
442
|
total_useful_sequences = 0.0
|
|
440
443
|
temp_seq_counter = dict()
|
|
441
444
|
for model, model_regions in multiregion_matches.items():
|
|
442
|
-
print(model)
|
|
443
445
|
result = normalise_results(model_regions)
|
|
444
446
|
if result is None:
|
|
445
447
|
run_status = "ambiguous"
|
|
@@ -469,6 +471,16 @@ def retrieve_regions(
|
|
|
469
471
|
logging.info("No output will be produced - the run is ambiguous.")
|
|
470
472
|
continue
|
|
471
473
|
|
|
474
|
+
coverage_fw = open(f"{outfile_prefix}_all_coverages.txt", "w")
|
|
475
|
+
|
|
476
|
+
for domain, regions in all_region_coverages.items():
|
|
477
|
+
for region in regions:
|
|
478
|
+
if len(regions[region]) < MIN_SEQ_COUNT:
|
|
479
|
+
continue
|
|
480
|
+
region_coverage = float(np.mean(regions[region]))
|
|
481
|
+
if region_coverage > 0:
|
|
482
|
+
coverage_fw.write(f"{domain}:{region}: {region_coverage}\n")
|
|
483
|
+
|
|
472
484
|
json_outfile = "{}.json".format(outfile_prefix)
|
|
473
485
|
tsv_outfile = "{}.tsv".format(outfile_prefix)
|
|
474
486
|
with open(json_outfile, "w") as f:
|
|
@@ -300,23 +300,24 @@ def main():
|
|
|
300
300
|
if paired_end:
|
|
301
301
|
rev_fr.close()
|
|
302
302
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
303
|
+
if asv_dict: # if there are matches between taxonomic and ASV annotations
|
|
304
|
+
ref_db = ""
|
|
305
|
+
|
|
306
|
+
if len(taxa_df.columns) == 9:
|
|
307
|
+
tax_assignment_dict = make_tax_assignment_dict_silva(taxa_df, asv_dict)
|
|
308
|
+
ref_db = "silva"
|
|
309
|
+
elif len(taxa_df.columns) == 10:
|
|
310
|
+
tax_assignment_dict = make_tax_assignment_dict_pr2(taxa_df, asv_dict)
|
|
311
|
+
ref_db = "pr2"
|
|
312
|
+
|
|
313
|
+
with open(f"./{sample}_{amp_region}_{ref_db}_asv_krona_counts.txt", "w") as fw:
|
|
314
|
+
for tax_assignment, count in tax_assignment_dict.items():
|
|
315
|
+
fw.write(f"{count}\t{tax_assignment}\n")
|
|
316
|
+
|
|
317
|
+
asv_count_df = generate_asv_count_dict(asv_dict)
|
|
318
|
+
asv_count_df.to_csv(
|
|
319
|
+
f"./{sample}_{amp_region}_asv_read_counts.tsv", sep="\t", index=False
|
|
320
|
+
)
|
|
320
321
|
|
|
321
322
|
|
|
322
323
|
if __name__ == "__main__":
|
|
@@ -19,6 +19,7 @@ from collections import defaultdict
|
|
|
19
19
|
import re
|
|
20
20
|
|
|
21
21
|
from Bio import SeqIO
|
|
22
|
+
from Bio.Seq import Seq
|
|
22
23
|
import pandas as pd
|
|
23
24
|
|
|
24
25
|
from mgnify_pipelines_toolkit.constants.var_region_coordinates import (
|
|
@@ -49,18 +50,26 @@ def parse_args():
|
|
|
49
50
|
help="Path to concatenated primers fasta file",
|
|
50
51
|
)
|
|
51
52
|
parser.add_argument("-s", "--sample", required=True, type=str, help="Sample ID")
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--se",
|
|
55
|
+
action=argparse.BooleanOptionalAction,
|
|
56
|
+
help="Flag for if run is single-end",
|
|
57
|
+
)
|
|
52
58
|
args = parser.parse_args()
|
|
53
59
|
|
|
54
60
|
input = args.input
|
|
55
61
|
fasta = args.fasta
|
|
56
62
|
sample = args.sample
|
|
63
|
+
single_end = args.se
|
|
57
64
|
|
|
58
|
-
return input, fasta, sample
|
|
65
|
+
return input, fasta, sample, single_end
|
|
59
66
|
|
|
60
67
|
|
|
61
68
|
def get_amp_region(beg, end, strand, model):
|
|
62
69
|
prev_region = ""
|
|
63
70
|
|
|
71
|
+
margin = -10
|
|
72
|
+
|
|
64
73
|
for region, region_coords in model.items():
|
|
65
74
|
|
|
66
75
|
region_beg = region_coords[0]
|
|
@@ -68,10 +77,10 @@ def get_amp_region(beg, end, strand, model):
|
|
|
68
77
|
end_diff = region_beg - end
|
|
69
78
|
|
|
70
79
|
if strand == STRAND_FWD:
|
|
71
|
-
if beg_diff
|
|
80
|
+
if beg_diff >= margin and end_diff >= margin:
|
|
72
81
|
return region
|
|
73
82
|
else:
|
|
74
|
-
if beg_diff
|
|
83
|
+
if beg_diff >= margin and end_diff >= margin:
|
|
75
84
|
return prev_region
|
|
76
85
|
|
|
77
86
|
prev_region = region
|
|
@@ -80,10 +89,14 @@ def get_amp_region(beg, end, strand, model):
|
|
|
80
89
|
|
|
81
90
|
|
|
82
91
|
def main():
|
|
83
|
-
|
|
92
|
+
|
|
93
|
+
input, fasta, sample, single_end = parse_args()
|
|
84
94
|
res_dict = defaultdict(list)
|
|
85
95
|
fasta_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
|
|
86
96
|
|
|
97
|
+
fwd_primers_fw = open("./fwd_primers.fasta", "w")
|
|
98
|
+
rev_primers_fw = open("./rev_primers.fasta", "w")
|
|
99
|
+
|
|
87
100
|
with open(input, "r") as fr:
|
|
88
101
|
for line in fr:
|
|
89
102
|
line = line.strip()
|
|
@@ -104,8 +117,12 @@ def main():
|
|
|
104
117
|
elif rfam == "RF01960":
|
|
105
118
|
gene = "18S"
|
|
106
119
|
model = REGIONS_18S
|
|
107
|
-
else:
|
|
108
|
-
|
|
120
|
+
else: # For cases when it's a std primer but for some reason hasn't matched the model
|
|
121
|
+
if primer_name == "F_auto" or primer_name == "R_auto":
|
|
122
|
+
continue
|
|
123
|
+
gene = "Unknown"
|
|
124
|
+
amp_region = "Unknown"
|
|
125
|
+
model = ""
|
|
109
126
|
|
|
110
127
|
res_dict["Run"].append(sample)
|
|
111
128
|
res_dict["AssertionEvidence"].append("ECO_0000363")
|
|
@@ -113,12 +130,13 @@ def main():
|
|
|
113
130
|
|
|
114
131
|
strand = ""
|
|
115
132
|
|
|
116
|
-
if "
|
|
133
|
+
if primer_name == "F_auto" or primer_name[-1] == "F":
|
|
117
134
|
strand = STRAND_FWD
|
|
118
|
-
elif "
|
|
135
|
+
elif primer_name == "R_auto" or primer_name[-1] == "R":
|
|
119
136
|
strand = STRAND_REV
|
|
120
137
|
|
|
121
|
-
|
|
138
|
+
if model:
|
|
139
|
+
amp_region = get_amp_region(beg, end, strand, model)
|
|
122
140
|
primer_seq = str(fasta_dict[primer_name].seq)
|
|
123
141
|
|
|
124
142
|
res_dict["Gene"].append(gene)
|
|
@@ -127,9 +145,19 @@ def main():
|
|
|
127
145
|
res_dict["PrimerStrand"].append(strand)
|
|
128
146
|
res_dict["PrimerSeq"].append(primer_seq)
|
|
129
147
|
|
|
148
|
+
if strand == STRAND_FWD:
|
|
149
|
+
fwd_primers_fw.write(f">{primer_name}\n{primer_seq}\n")
|
|
150
|
+
elif strand == STRAND_REV:
|
|
151
|
+
if single_end:
|
|
152
|
+
primer_seq = Seq(primer_seq).reverse_complement()
|
|
153
|
+
rev_primers_fw.write(f">{primer_name}\n{primer_seq}\n")
|
|
154
|
+
|
|
130
155
|
res_df = pd.DataFrame.from_dict(res_dict)
|
|
131
156
|
res_df.to_csv(f"./{sample}_primer_validation.tsv", sep="\t", index=False)
|
|
132
157
|
|
|
158
|
+
fwd_primers_fw.close()
|
|
159
|
+
rev_primers_fw.close()
|
|
160
|
+
|
|
133
161
|
|
|
134
162
|
if __name__ == "__main__":
|
|
135
163
|
main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|