mgnify-pipelines-toolkit 1.2.3__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

Files changed (56) hide show
  1. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/PKG-INFO +1 -1
  2. mgnify_pipelines_toolkit-1.2.5/mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py +87 -0
  3. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py +21 -7
  4. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/PKG-INFO +1 -1
  5. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/SOURCES.txt +1 -0
  6. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/entry_points.txt +1 -0
  7. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/pyproject.toml +2 -1
  8. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/LICENSE +0 -0
  9. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/README.md +0 -0
  10. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/__init__.py +0 -0
  11. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/__init__.py +0 -0
  12. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py +0 -0
  13. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py +0 -0
  14. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py +0 -0
  15. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py +0 -0
  16. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py +0 -0
  17. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/amplicon/study_summary_generator.py +0 -0
  18. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py +0 -0
  19. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py +0 -0
  20. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py +0 -0
  21. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py +0 -0
  22. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py +0 -0
  23. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py +0 -0
  24. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py +0 -0
  25. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/go_utils.py +0 -0
  26. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py +0 -0
  27. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_cazys.py +0 -0
  28. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/process_dbcan_result_clusters.py +0 -0
  29. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/study_summary_generator.py +0 -0
  30. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_antismash_bgcs.py +0 -0
  31. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py +0 -0
  32. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/assembly/summarise_sanntis_bgcs.py +0 -0
  33. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/genomes/__init__.py +0 -0
  34. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/__init__.py +0 -0
  35. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py +0 -0
  36. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py +0 -0
  37. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py +0 -0
  38. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/get_subunits.py +0 -0
  39. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py +0 -0
  40. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py +0 -0
  41. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py +0 -0
  42. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/analysis/shared/markergene_study_summary.py +0 -0
  43. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/db_labels.py +0 -0
  44. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/ncrna.py +0 -0
  45. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/regex_fasta_header.py +0 -0
  46. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/tax_ranks.py +0 -0
  47. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/thresholds.py +0 -0
  48. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/constants/var_region_coordinates.py +0 -0
  49. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/schemas/schemas.py +0 -0
  50. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/__init__.py +0 -0
  51. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/fasta_to_delimited.py +0 -0
  52. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit/utils/get_mpt_version.py +0 -0
  53. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/dependency_links.txt +0 -0
  54. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/requires.txt +0 -0
  55. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/mgnify_pipelines_toolkit.egg-info/top_level.txt +0 -0
  56. {mgnify_pipelines_toolkit-1.2.3 → mgnify_pipelines_toolkit-1.2.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ from itertools import product
3
+ from pathlib import Path
4
+
5
+ from Bio import SeqIO
6
+
7
+
8
+ def parse_args():
9
+
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument(
12
+ "-i",
13
+ "--input_primers",
14
+ required=True,
15
+ type=str,
16
+ help="Input primers to generate permutations for due to IUPAC ambiguous codes",
17
+ )
18
+ parser.add_argument("-p", "--prefix", required=True, type=str, help="Output prefix")
19
+
20
+ args = parser.parse_args()
21
+
22
+ input_path = args.input_primers
23
+ prefix = args.prefix
24
+
25
+ return input_path, prefix
26
+
27
+
28
+ def permute_seq(seq):
29
+
30
+ ambiguous_bases_dict = {
31
+ "R": ["A", "G"],
32
+ "Y": ["C", "T"],
33
+ "S": ["G", "C"],
34
+ "W": ["A", "T"],
35
+ "K": ["G", "T"],
36
+ "M": ["A", "C"],
37
+ "B": ["C", "G", "T"],
38
+ "D": ["A", "G", "T"],
39
+ "H": ["A", "C", "T"],
40
+ "V": ["A", "C", "G"],
41
+ "N": ["A", "C", "T", "G"],
42
+ }
43
+
44
+ seq_template = []
45
+
46
+ for base in seq:
47
+ if base in ["A", "C", "T", "G"]:
48
+ seq_template.append(base)
49
+ else:
50
+ seq_template.append(ambiguous_bases_dict[base])
51
+
52
+ seq_permutations = []
53
+ for combo in product(*seq_template):
54
+ seq_permutations.append("".join(combo))
55
+
56
+ return seq_permutations
57
+
58
+
59
+ def make_primer_permutations(primers_dict, prefix):
60
+
61
+ with open(f"{prefix}_permuted_primers.fasta", "w") as fw:
62
+ for primer_name, seq in primers_dict.items():
63
+
64
+ primer_seq = seq.seq
65
+ fw.write(f">{primer_name}\n{primer_seq}\n")
66
+
67
+ if primer_name == "F_auto" or primer_name[-1] == "F":
68
+ strand = "F"
69
+ elif primer_name == "R_auto" or primer_name[-1] == "R":
70
+ strand = "R"
71
+
72
+ seq_permutations = permute_seq(primer_seq)
73
+
74
+ for counter, permuted_seq in enumerate(seq_permutations, 1):
75
+ variant_name = f"{primer_name}_variant_{counter}_{strand}"
76
+ fw.write(f">{variant_name}\n{permuted_seq}\n")
77
+
78
+
79
+ def main():
80
+
81
+ input_path, prefix = parse_args()
82
+ primers_dict = SeqIO.to_dict(SeqIO.parse(Path(input_path), "fasta"))
83
+ make_primer_permutations(primers_dict, prefix)
84
+
85
+
86
+ if __name__ == "__main__":
87
+ main()
@@ -97,6 +97,8 @@ def main():
97
97
  fwd_primers_fw = open("./fwd_primers.fasta", "w")
98
98
  rev_primers_fw = open("./rev_primers.fasta", "w")
99
99
 
100
+ matched_primers_list = []
101
+
100
102
  with open(input, "r") as fr:
101
103
  for line in fr:
102
104
  line = line.strip()
@@ -108,6 +110,13 @@ def main():
108
110
  beg = float(line_lst[5])
109
111
  end = float(line_lst[6])
110
112
 
113
+ if "variant" not in primer_name:
114
+ continue
115
+
116
+ cleaned_primer_name = "_".join(primer_name.split("_")[0:-3])
117
+ if cleaned_primer_name in matched_primers_list:
118
+ continue
119
+
111
120
  if rfam == "RF00177":
112
121
  gene = "16S"
113
122
  model = REGIONS_16S_BACTERIA
@@ -118,7 +127,7 @@ def main():
118
127
  gene = "18S"
119
128
  model = REGIONS_18S
120
129
  else: # For cases when it's a std primer but for some reason hasn't matched the model
121
- if primer_name == "F_auto" or primer_name == "R_auto":
130
+ if cleaned_primer_name == "F_auto" or cleaned_primer_name == "R_auto":
122
131
  continue
123
132
  gene = "Unknown"
124
133
  amp_region = "Unknown"
@@ -130,27 +139,32 @@ def main():
130
139
 
131
140
  strand = ""
132
141
 
133
- if primer_name == "F_auto" or primer_name[-1] == "F":
142
+ if primer_name[-1] == "F":
134
143
  strand = STRAND_FWD
135
- elif primer_name == "R_auto" or primer_name[-1] == "R":
144
+ elif primer_name[-1] == "R":
136
145
  strand = STRAND_REV
146
+ else:
147
+ print(f"Not sure what strand this is, exiting: {primer_name}")
137
148
 
138
149
  if model:
139
150
  amp_region = get_amp_region(beg, end, strand, model)
140
- primer_seq = str(fasta_dict[primer_name].seq)
151
+
152
+ primer_seq = str(fasta_dict[cleaned_primer_name].seq)
141
153
 
142
154
  res_dict["Gene"].append(gene)
143
155
  res_dict["VariableRegion"].append(amp_region)
144
- res_dict["PrimerName"].append(primer_name)
156
+ res_dict["PrimerName"].append(cleaned_primer_name)
145
157
  res_dict["PrimerStrand"].append(strand)
146
158
  res_dict["PrimerSeq"].append(primer_seq)
147
159
 
148
160
  if strand == STRAND_FWD:
149
- fwd_primers_fw.write(f">{primer_name}\n{primer_seq}\n")
161
+ fwd_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
150
162
  elif strand == STRAND_REV:
151
163
  if single_end:
152
164
  primer_seq = Seq(primer_seq).reverse_complement()
153
- rev_primers_fw.write(f">{primer_name}\n{primer_seq}\n")
165
+ rev_primers_fw.write(f">{cleaned_primer_name}\n{primer_seq}\n")
166
+
167
+ matched_primers_list.append(cleaned_primer_name)
154
168
 
155
169
  res_df = pd.DataFrame.from_dict(res_dict)
156
170
  res_df.to_csv(f"./{sample}_primer_validation.tsv", sep="\t", index=False)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -12,6 +12,7 @@ mgnify_pipelines_toolkit/analysis/__init__.py
12
12
  mgnify_pipelines_toolkit/analysis/amplicon/classify_var_regions.py
13
13
  mgnify_pipelines_toolkit/analysis/amplicon/make_asv_count_table.py
14
14
  mgnify_pipelines_toolkit/analysis/amplicon/mapseq_to_asv_table.py
15
+ mgnify_pipelines_toolkit/analysis/amplicon/permute_primers.py
15
16
  mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py
16
17
  mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py
17
18
  mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py
@@ -23,6 +23,7 @@ make_asv_count_table = mgnify_pipelines_toolkit.analysis.amplicon.make_asv_count
23
23
  mapseq2biom = mgnify_pipelines_toolkit.analysis.shared.mapseq2biom:main
24
24
  mapseq_to_asv_table = mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main
25
25
  markergene_study_summary = mgnify_pipelines_toolkit.analysis.shared.markergene_study_summary:main
26
+ permute_primers = mgnify_pipelines_toolkit.analysis.amplicon.permute_primers:main
26
27
  primer_val_classification = mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main
27
28
  process_dbcan_cazys = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_cazys:main
28
29
  process_dbcan_clusters = mgnify_pipelines_toolkit.analysis.assembly.process_dbcan_result_clusters:main
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mgnify_pipelines_toolkit"
3
- version = "1.2.3"
3
+ version = "1.2.5"
4
4
  readme = "README.md"
5
5
  license = { text = "Apache Software License 2.0" }
6
6
  authors = [
@@ -58,6 +58,7 @@ make_asv_count_table = "mgnify_pipelines_toolkit.analysis.amplicon.make_asv_coun
58
58
  remove_ambiguous_reads = "mgnify_pipelines_toolkit.analysis.amplicon.remove_ambiguous_reads:main"
59
59
  rev_comp_se_primers = "mgnify_pipelines_toolkit.analysis.amplicon.rev_comp_se_primers:main"
60
60
  mapseq_to_asv_table = "mgnify_pipelines_toolkit.analysis.amplicon.mapseq_to_asv_table:main"
61
+ permute_primers = "mgnify_pipelines_toolkit.analysis.amplicon.permute_primers:main"
61
62
  primer_val_classification = "mgnify_pipelines_toolkit.analysis.amplicon.primer_val_classification:main"
62
63
  amplicon_study_summary_generator = "mgnify_pipelines_toolkit.analysis.amplicon.study_summary_generator:cli"
63
64
  # analysis.assembly #