mgnify-pipelines-toolkit 1.2.3__py3-none-any.whl → 1.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py +87 -0
- mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +18 -7
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/METADATA +1 -1
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/RECORD +8 -7
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/entry_points.txt +1 -0
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/WHEEL +0 -0
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/licenses/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from itertools import product
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from Bio import SeqIO
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_args():
|
|
9
|
+
|
|
10
|
+
parser = argparse.ArgumentParser()
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
"-i",
|
|
13
|
+
"--input_primers",
|
|
14
|
+
required=True,
|
|
15
|
+
type=str,
|
|
16
|
+
help="Input primers to generate permutations for due to IUPAC ambiguous codes",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument("-p", "--prefix", required=True, type=str, help="Output prefix")
|
|
19
|
+
|
|
20
|
+
args = parser.parse_args()
|
|
21
|
+
|
|
22
|
+
input_path = args.input_primers
|
|
23
|
+
prefix = args.prefix
|
|
24
|
+
|
|
25
|
+
return input_path, prefix
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def permute_seq(seq):
|
|
29
|
+
|
|
30
|
+
ambiguous_bases_dict = {
|
|
31
|
+
"R": ["A", "G"],
|
|
32
|
+
"Y": ["C", "T"],
|
|
33
|
+
"S": ["G", "C"],
|
|
34
|
+
"W": ["A", "T"],
|
|
35
|
+
"K": ["G", "T"],
|
|
36
|
+
"M": ["A", "C"],
|
|
37
|
+
"B": ["C", "G", "T"],
|
|
38
|
+
"D": ["A", "G", "T"],
|
|
39
|
+
"H": ["A", "C", "T"],
|
|
40
|
+
"V": ["A", "C", "G"],
|
|
41
|
+
"N": ["A", "C", "T", "G"],
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
seq_template = []
|
|
45
|
+
|
|
46
|
+
for base in seq:
|
|
47
|
+
if base in ["A", "C", "T", "G"]:
|
|
48
|
+
seq_template.append(base)
|
|
49
|
+
else:
|
|
50
|
+
seq_template.append(ambiguous_bases_dict[base])
|
|
51
|
+
|
|
52
|
+
seq_permutations = []
|
|
53
|
+
for combo in product(*seq_template):
|
|
54
|
+
seq_permutations.append("".join(combo))
|
|
55
|
+
|
|
56
|
+
return seq_permutations
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def make_primer_permutations(primers_dict, prefix):
|
|
60
|
+
|
|
61
|
+
with open(f"{prefix}_permuted_primers.fasta", "w") as fw:
|
|
62
|
+
for primer_name, seq in primers_dict.items():
|
|
63
|
+
|
|
64
|
+
primer_seq = seq.seq
|
|
65
|
+
fw.write(f">{primer_name}\n{primer_seq}\n")
|
|
66
|
+
|
|
67
|
+
if primer_name == "F_auto" or primer_name[-1] == "F":
|
|
68
|
+
strand = "F"
|
|
69
|
+
elif primer_name == "R_auto" or primer_name[-1] == "R":
|
|
70
|
+
strand = "R"
|
|
71
|
+
|
|
72
|
+
seq_permutations = permute_seq(primer_seq)
|
|
73
|
+
|
|
74
|
+
for counter, permuted_seq in enumerate(seq_permutations, 1):
|
|
75
|
+
variant_name = f"{primer_name}_variant_{counter}_{strand}"
|
|
76
|
+
fw.write(f">{variant_name}\n{permuted_seq}\n")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def main():
|
|
80
|
+
|
|
81
|
+
input_path, prefix = parse_args()
|
|
82
|
+
primers_dict = SeqIO.to_dict(SeqIO.parse(Path(input_path), "fasta"))
|
|
83
|
+
make_primer_permutations(primers_dict, prefix)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
@@ -97,6 +97,8 @@ def main():
|
|
|
97
97
|
fwd_primers_fw = open("./fwd_primers.fasta", "w")
|
|
98
98
|
rev_primers_fw = open("./rev_primers.fasta", "w")
|
|
99
99
|
|
|
100
|
+
matched_primers_list = []
|
|
101
|
+
|
|
100
102
|
with open(input, "r") as fr:
|
|
101
103
|
for line in fr:
|
|
102
104
|
line = line.strip()
|
|
@@ -108,6 +110,10 @@ def main():
|
|
|
108
110
|
beg = float(line_lst[5])
|
|
109
111
|
end = float(line_lst[6])
|
|
110
112
|
|
|
113
|
+
cleaned_primer_name = "_".join(primer_name.split("_")[0:-3])
|
|
114
|
+
if cleaned_primer_name in matched_primers_list:
|
|
115
|
+
continue
|
|
116
|
+
|
|
111
117
|
if rfam == "RF00177":
|
|
112
118
|
gene = "16S"
|
|
113
119
|
model = REGIONS_16S_BACTERIA
|
|
@@ -118,7 +124,7 @@ def main():
|
|
|
118
124
|
gene = "18S"
|
|
119
125
|
model = REGIONS_18S
|
|
120
126
|
else: # For cases when it's a std primer but for some reason hasn't matched the model
|
|
121
|
-
if
|
|
127
|
+
if cleaned_primer_name == "F_auto" or cleaned_primer_name == "R_auto":
|
|
122
128
|
continue
|
|
123
129
|
gene = "Unknown"
|
|
124
130
|
amp_region = "Unknown"
|
|
@@ -130,27 +136,32 @@ def main():
|
|
|
130
136
|
|
|
131
137
|
strand = ""
|
|
132
138
|
|
|
133
|
-
if primer_name
|
|
139
|
+
if primer_name[-1] == "F":
|
|
134
140
|
strand = STRAND_FWD
|
|
135
|
-
elif primer_name
|
|
141
|
+
elif primer_name[-1] == "R":
|
|
136
142
|
strand = STRAND_REV
|
|
143
|
+
else:
|
|
144
|
+
print(f"Not sure what strand this is, exiting: {primer_name}")
|
|
137
145
|
|
|
138
146
|
if model:
|
|
139
147
|
amp_region = get_amp_region(beg, end, strand, model)
|
|
140
|
-
|
|
148
|
+
|
|
149
|
+
primer_seq = str(fasta_dict[cleaned_primer_name].seq)
|
|
141
150
|
|
|
142
151
|
res_dict["Gene"].append(gene)
|
|
143
152
|
res_dict["VariableRegion"].append(amp_region)
|
|
144
|
-
res_dict["PrimerName"].append(
|
|
153
|
+
res_dict["PrimerName"].append(cleaned_primer_name)
|
|
145
154
|
res_dict["PrimerStrand"].append(strand)
|
|
146
155
|
res_dict["PrimerSeq"].append(primer_seq)
|
|
147
156
|
|
|
148
157
|
if strand == STRAND_FWD:
|
|
149
|
-
fwd_primers_fw.write(f">{
|
|
158
|
+
fwd_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
|
|
150
159
|
elif strand == STRAND_REV:
|
|
151
160
|
if single_end:
|
|
152
161
|
primer_seq = Seq(primer_seq).reverse_complement()
|
|
153
|
-
rev_primers_fw.write(f">{
|
|
162
|
+
rev_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
|
|
163
|
+
|
|
164
|
+
matched_primers_list.append(cleaned_primer_name)
|
|
154
165
|
|
|
155
166
|
res_df = pd.DataFrame.from_dict(res_dict)
|
|
156
167
|
res_df.to_csv(f"./{sample}_primer_validation.tsv", sep="\t", index=False)
|
{mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/RECORD
RENAMED
|
@@ -3,7 +3,8 @@ mgnify_pipelines_toolkit/analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
3
3
|
mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py,sha256=nUvboLz08RyqRE8Thfh8NRlmgJk0kVdXcSvgmAfKip0,20649
|
|
4
4
|
mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py,sha256=ohguvrMSg7GuiiZ5aHj1DvCnfThKFUG4s13LUSMM0mo,8892
|
|
5
5
|
mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py,sha256=BLqhflblUegCvuQic16PrFXfIXlFWmGkmWJyl4wJoLQ,5040
|
|
6
|
-
mgnify_pipelines_toolkit/analysis/amplicon/
|
|
6
|
+
mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py,sha256=1aGOJX9tC7M1rnd0U2PeJ681sUo02wxk7_ycJqeVt6s,2216
|
|
7
|
+
mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256=I9JfkM_o6Wp4VINOMO6ff9mHqghdJw1kDIfiF37JtLo,5185
|
|
7
8
|
mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py,sha256=Wu4tRtuRkgd3hoeuwPl_E5ghxIW7e_1vrcvFGWv_U4A,3173
|
|
8
9
|
mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py,sha256=yLpzkRJXAeXRUNgz60zopEwHcdprM2UDjquE-GkrFys,1722
|
|
9
10
|
mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py,sha256=epVClL10QcllL8yu7YGjx0rXNVHL2GxHi-Ek0MOjsjo,13859
|
|
@@ -42,9 +43,9 @@ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pyDZvCuWbwccQF0D7c5BN1vv36wQd
|
|
|
42
43
|
mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
43
44
|
mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
|
|
44
45
|
mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
|
|
45
|
-
mgnify_pipelines_toolkit-1.2.
|
|
46
|
-
mgnify_pipelines_toolkit-1.2.
|
|
47
|
-
mgnify_pipelines_toolkit-1.2.
|
|
48
|
-
mgnify_pipelines_toolkit-1.2.
|
|
49
|
-
mgnify_pipelines_toolkit-1.2.
|
|
50
|
-
mgnify_pipelines_toolkit-1.2.
|
|
46
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
47
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/METADATA,sha256=UXCHFcEcjuPMZvUgtzITSY_iIG-j_nfGVBMGCWjBjjA,5775
|
|
48
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
49
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/entry_points.txt,sha256=sHDxlHizt_iZPtkNp0EDuohDGvC4O12B57JtpUmHwYk,3123
|
|
50
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
|
|
51
|
+
mgnify_pipelines_toolkit-1.2.4.dist-info/RECORD,,
|
|
@@ -23,6 +23,7 @@ make_asv_count_table = mgnify_pipelines_toolkit.analysis.amplicon.make_asv_count
|
|
|
23
23
|
mapseq2biom = mgnify_pipelines_toolkit.analysis.shared.mapseq2biom:main
|
|
24
24
|
mapseq_to_asv_table = mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main
|
|
25
25
|
markergene_study_summary = mgnify_pipelines_toolkit.analysis.shared.markergene_study_summary:main
|
|
26
|
+
permute_primers = mgnify_pipelines_toolkit.analysis.amplicon.permute_primers:main
|
|
26
27
|
primer_val_classification = mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main
|
|
27
28
|
process_dbcan_cazys = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_cazys:main
|
|
28
29
|
process_dbcan_clusters = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_clusters:main
|
|
File without changes
|
|
File without changes
|
{mgnify_pipelines_toolkit-1.2.3.dist-info → mgnify_pipelines_toolkit-1.2.4.dist-info}/top_level.txt
RENAMED
|
File without changes
|