mgnify-pipelines-toolkit 1.2.3__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/PKG-INFO +1 -1
- mgnify_pipelines_toolkit-1.2.5/mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py +87 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +21 -7
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +1 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +1 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/pyproject.toml +2 -1
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/LICENSE +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/README.md +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
- {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/setup.cfg +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from itertools import product
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from Bio import SeqIO
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_args():
|
|
9
|
+
|
|
10
|
+
parser = argparse.ArgumentParser()
|
|
11
|
+
parser.add_argument(
|
|
12
|
+
"-i",
|
|
13
|
+
"--input_primers",
|
|
14
|
+
required=True,
|
|
15
|
+
type=str,
|
|
16
|
+
help="Input primers to generate permutations for due to IUPAC ambiguous codes",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument("-p", "--prefix", required=True, type=str, help="Output prefix")
|
|
19
|
+
|
|
20
|
+
args = parser.parse_args()
|
|
21
|
+
|
|
22
|
+
input_path = args.input_primers
|
|
23
|
+
prefix = args.prefix
|
|
24
|
+
|
|
25
|
+
return input_path, prefix
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def permute_seq(seq):
|
|
29
|
+
|
|
30
|
+
ambiguous_bases_dict = {
|
|
31
|
+
"R": ["A", "G"],
|
|
32
|
+
"Y": ["C", "T"],
|
|
33
|
+
"S": ["G", "C"],
|
|
34
|
+
"W": ["A", "T"],
|
|
35
|
+
"K": ["G", "T"],
|
|
36
|
+
"M": ["A", "C"],
|
|
37
|
+
"B": ["C", "G", "T"],
|
|
38
|
+
"D": ["A", "G", "T"],
|
|
39
|
+
"H": ["A", "C", "T"],
|
|
40
|
+
"V": ["A", "C", "G"],
|
|
41
|
+
"N": ["A", "C", "T", "G"],
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
seq_template = []
|
|
45
|
+
|
|
46
|
+
for base in seq:
|
|
47
|
+
if base in ["A", "C", "T", "G"]:
|
|
48
|
+
seq_template.append(base)
|
|
49
|
+
else:
|
|
50
|
+
seq_template.append(ambiguous_bases_dict[base])
|
|
51
|
+
|
|
52
|
+
seq_permutations = []
|
|
53
|
+
for combo in product(*seq_template):
|
|
54
|
+
seq_permutations.append("".join(combo))
|
|
55
|
+
|
|
56
|
+
return seq_permutations
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def make_primer_permutations(primers_dict, prefix):
|
|
60
|
+
|
|
61
|
+
with open(f"{prefix}_permuted_primers.fasta", "w") as fw:
|
|
62
|
+
for primer_name, seq in primers_dict.items():
|
|
63
|
+
|
|
64
|
+
primer_seq = seq.seq
|
|
65
|
+
fw.write(f">{primer_name}\n{primer_seq}\n")
|
|
66
|
+
|
|
67
|
+
if primer_name == "F_auto" or primer_name[-1] == "F":
|
|
68
|
+
strand = "F"
|
|
69
|
+
elif primer_name == "R_auto" or primer_name[-1] == "R":
|
|
70
|
+
strand = "R"
|
|
71
|
+
|
|
72
|
+
seq_permutations = permute_seq(primer_seq)
|
|
73
|
+
|
|
74
|
+
for counter, permuted_seq in enumerate(seq_permutations, 1):
|
|
75
|
+
variant_name = f"{primer_name}_variant_{counter}_{strand}"
|
|
76
|
+
fw.write(f">{variant_name}\n{permuted_seq}\n")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def main():
|
|
80
|
+
|
|
81
|
+
input_path, prefix = parse_args()
|
|
82
|
+
primers_dict = SeqIO.to_dict(SeqIO.parse(Path(input_path), "fasta"))
|
|
83
|
+
make_primer_permutations(primers_dict, prefix)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|
|
@@ -97,6 +97,8 @@ def main():
|
|
|
97
97
|
fwd_primers_fw = open("./fwd_primers.fasta", "w")
|
|
98
98
|
rev_primers_fw = open("./rev_primers.fasta", "w")
|
|
99
99
|
|
|
100
|
+
matched_primers_list = []
|
|
101
|
+
|
|
100
102
|
with open(input, "r") as fr:
|
|
101
103
|
for line in fr:
|
|
102
104
|
line = line.strip()
|
|
@@ -108,6 +110,13 @@ def main():
|
|
|
108
110
|
beg = float(line_lst[5])
|
|
109
111
|
end = float(line_lst[6])
|
|
110
112
|
|
|
113
|
+
if "variant" not in primer_name:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
cleaned_primer_name = "_".join(primer_name.split("_")[0:-3])
|
|
117
|
+
if cleaned_primer_name in matched_primers_list:
|
|
118
|
+
continue
|
|
119
|
+
|
|
111
120
|
if rfam == "RF00177":
|
|
112
121
|
gene = "16S"
|
|
113
122
|
model = REGIONS_16S_BACTERIA
|
|
@@ -118,7 +127,7 @@ def main():
|
|
|
118
127
|
gene = "18S"
|
|
119
128
|
model = REGIONS_18S
|
|
120
129
|
else: # For cases when it's a std primer but for some reason hasn't matched the model
|
|
121
|
-
if
|
|
130
|
+
if cleaned_primer_name == "F_auto" or cleaned_primer_name == "R_auto":
|
|
122
131
|
continue
|
|
123
132
|
gene = "Unknown"
|
|
124
133
|
amp_region = "Unknown"
|
|
@@ -130,27 +139,32 @@ def main():
|
|
|
130
139
|
|
|
131
140
|
strand = ""
|
|
132
141
|
|
|
133
|
-
if primer_name
|
|
142
|
+
if primer_name[-1] == "F":
|
|
134
143
|
strand = STRAND_FWD
|
|
135
|
-
elif primer_name
|
|
144
|
+
elif primer_name[-1] == "R":
|
|
136
145
|
strand = STRAND_REV
|
|
146
|
+
else:
|
|
147
|
+
print(f"Not sure what strand this is, exiting: {primer_name}")
|
|
137
148
|
|
|
138
149
|
if model:
|
|
139
150
|
amp_region = get_amp_region(beg, end, strand, model)
|
|
140
|
-
|
|
151
|
+
|
|
152
|
+
primer_seq = str(fasta_dict[cleaned_primer_name].seq)
|
|
141
153
|
|
|
142
154
|
res_dict["Gene"].append(gene)
|
|
143
155
|
res_dict["VariableRegion"].append(amp_region)
|
|
144
|
-
res_dict["PrimerName"].append(
|
|
156
|
+
res_dict["PrimerName"].append(cleaned_primer_name)
|
|
145
157
|
res_dict["PrimerStrand"].append(strand)
|
|
146
158
|
res_dict["PrimerSeq"].append(primer_seq)
|
|
147
159
|
|
|
148
160
|
if strand == STRAND_FWD:
|
|
149
|
-
fwd_primers_fw.write(f">{
|
|
161
|
+
fwd_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
|
|
150
162
|
elif strand == STRAND_REV:
|
|
151
163
|
if single_end:
|
|
152
164
|
primer_seq = Seq(primer_seq).reverse_complement()
|
|
153
|
-
rev_primers_fw.write(f">{
|
|
165
|
+
rev_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
|
|
166
|
+
|
|
167
|
+
matched_primers_list.append(cleaned_primer_name)
|
|
154
168
|
|
|
155
169
|
res_df = pd.DataFrame.from_dict(res_dict)
|
|
156
170
|
res_df.to_csv(f"./{sample}_primer_validation.tsv", sep="\t", index=False)
|
|
@@ -12,6 +12,7 @@ mgnify_pipelines_toolkit/analysis/__init__.py
|
|
|
12
12
|
mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py
|
|
13
13
|
mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py
|
|
14
14
|
mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py
|
|
15
|
+
mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py
|
|
15
16
|
mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py
|
|
16
17
|
mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py
|
|
17
18
|
mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py
|
|
@@ -23,6 +23,7 @@ make_asv_count_table = mgnify_pipelines_toolkit.analysis.amplicon.make_asv_count
|
|
|
23
23
|
mapseq2biom = mgnify_pipelines_toolkit.analysis.shared.mapseq2biom:main
|
|
24
24
|
mapseq_to_asv_table = mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main
|
|
25
25
|
markergene_study_summary = mgnify_pipelines_toolkit.analysis.shared.markergene_study_summary:main
|
|
26
|
+
permute_primers = mgnify_pipelines_toolkit.analysis.amplicon.permute_primers:main
|
|
26
27
|
primer_val_classification = mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main
|
|
27
28
|
process_dbcan_cazys = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_cazys:main
|
|
28
29
|
process_dbcan_clusters = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_clusters:main
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mgnify_pipelines_toolkit"
|
|
3
|
-
version = "1.2.
|
|
3
|
+
version = "1.2.5"
|
|
4
4
|
readme = "README.md"
|
|
5
5
|
license = { text = "Apache Software License 2.0" }
|
|
6
6
|
authors = [
|
|
@@ -58,6 +58,7 @@ make_asv_count_table = "mgnify_pipelines_toolkit.analysis.amplicon.make_asv_coun
|
|
|
58
58
|
remove_ambiguous_reads = "mgnify_pipelines_toolkit.analysis.amplicon.remove_ambiguous_reads:main"
|
|
59
59
|
rev_comp_se_primers = "mgnify_pipelines_toolkit.analysis.amplicon.rev_comp_se_primers:main"
|
|
60
60
|
mapseq_to_asv_table = "mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main"
|
|
61
|
+
permute_primers = "mgnify_pipelines_toolkit.analysis.amplicon.permute_primers:main"
|
|
61
62
|
primer_val_classification = "mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main"
|
|
62
63
|
amplicon_study_summary_generator = "mgnify_pipelines_toolkit.analysis.amplicon.study_summary_generator:cli"
|
|
63
64
|
# analysis.assembly #
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|