mgnify-pipelines-toolkit 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mgnify-pipelines-toolkit might be problematic. Click here for more details.

@@ -78,7 +78,11 @@ def main():
78
78
  "--output",
79
79
  required=True,
80
80
  type=Path,
81
- help="Output TSV file with columns: contig_id, protein_id, UniRef90 cluster, rhea_ids, CHEBI reaction participants",
81
+ help=(
82
+ "Output TSV file with columns: contig_id, protein_id, protein hash, "
83
+ "Rhea IDs, CHEBI reaction, reaction definition, 'top hit' if it is "
84
+ "the first hit for the protein"
85
+ ),
82
86
  )
83
87
  parser.add_argument(
84
88
  "-p",
@@ -40,10 +40,12 @@ def import_nodes(nodes_dmp):
40
40
  taxid2rank = {}
41
41
 
42
42
  with open(nodes_dmp) as f1:
43
- reader = csv.reader(f1, delimiter="\t")
44
- for line in reader:
45
- taxid = line[0]
46
- rank = line[4]
43
+ for line in f1:
44
+ fields = [part.strip() for part in line.split("|")]
45
+ if len(fields) != 14:
46
+ raise ValueError(f"Unexpected number of columns in line: {line}")
47
+ taxid = fields[0]
48
+ rank = fields[2]
47
49
  taxid2rank[taxid] = rank
48
50
 
49
51
  return taxid2rank
@@ -54,11 +56,13 @@ def import_names(names_dmp):
54
56
  taxid2name = {}
55
57
 
56
58
  with open(names_dmp, newline="") as f1:
57
- reader = csv.reader(f1, delimiter="\t")
58
- for line in reader:
59
- if line[6] == "scientific name":
60
- taxid = line[0]
61
- name = line[2]
59
+ for line in f1:
60
+ fields = [part.strip() for part in line.split("|")]
61
+ if len(fields) != 5:
62
+ raise ValueError(f"Unexpected number of columns in line: {line}")
63
+ if fields[3] == "scientific name":
64
+ taxid = fields[0]
65
+ name = fields[1]
62
66
  taxid2name[taxid] = name
63
67
 
64
68
  return taxid2name
@@ -108,7 +108,7 @@ def main():
108
108
 
109
109
  open_files = {}
110
110
  for record in SeqIO.parse(args.input, "fasta"):
111
- model = "-".join(record.id.split("/")[0].split("-")[-1:])
111
+ model = "-".join("/".join(record.id.split("/")[:-1]).split("-")[-1:])
112
112
  if model in SSU_MODELS:
113
113
  if SSU not in open_files:
114
114
  file_out = open(pattern_dict[SSU], "w")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mgnify_pipelines_toolkit
3
- Version: 1.0.3
3
+ Version: 1.0.4
4
4
  Summary: Collection of scripts and tools for MGnify pipelines
5
5
  Author-email: MGnify team <metagenomics-help@ebi.ac.uk>
6
6
  License: Apache Software License 2.0
@@ -11,33 +11,24 @@ Classifier: Operating System :: OS Independent
11
11
  Requires-Python: >=3.9
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: biopython==1.82
15
- Requires-Dist: numpy==1.26.0
16
- Requires-Dist: pandas==2.0.2
17
- Requires-Dist: regex==2023.12.25
18
- Requires-Dist: requests==2.32.3
19
- Requires-Dist: click==8.1.7
20
- Requires-Dist: pandera==0.22.1
21
- Requires-Dist: pyfastx>=2.2.0
22
- Requires-Dist: intervaltree==3.1.0
14
+ Requires-Dist: biopython>=1.85
15
+ Requires-Dist: numpy<3,>=2.2.4
16
+ Requires-Dist: pandas<3,>=2.2.3
17
+ Requires-Dist: regex>=2024.11.6
18
+ Requires-Dist: requests<3,>=2.32.3
19
+ Requires-Dist: click<9,>=8.1.8
20
+ Requires-Dist: pandera<0.24,>=0.23.1
21
+ Requires-Dist: pyfastx<3,>=2.2.0
22
+ Requires-Dist: intervaltree<4,>=3.1.0
23
23
  Provides-Extra: tests
24
- Requires-Dist: pytest==7.4.0; extra == "tests"
25
- Requires-Dist: pytest-md==0.2.0; extra == "tests"
26
- Requires-Dist: pytest-workflow==2.0.1; extra == "tests"
27
- Requires-Dist: biopython==1.82; extra == "tests"
28
- Requires-Dist: pandas==2.0.2; extra == "tests"
29
- Requires-Dist: numpy==1.26.0; extra == "tests"
30
- Requires-Dist: regex==2023.12.25; extra == "tests"
31
- Requires-Dist: requests==2.32.3; extra == "tests"
32
- Requires-Dist: click==8.1.7; extra == "tests"
33
- Requires-Dist: pandera==0.22.1; extra == "tests"
34
- Requires-Dist: pyfastx>=2.2.0; extra == "tests"
24
+ Requires-Dist: pytest<9,>=8.3.5; extra == "tests"
25
+ Requires-Dist: pytest-md>=0.2.0; extra == "tests"
26
+ Requires-Dist: pytest-workflow==2.1.0; extra == "tests"
35
27
  Provides-Extra: dev
36
- Requires-Dist: mgnify_pipelines_toolkit[tests]; extra == "dev"
37
- Requires-Dist: pre-commit==3.8.0; extra == "dev"
38
- Requires-Dist: black==24.8.0; extra == "dev"
39
- Requires-Dist: flake8==7.1.1; extra == "dev"
40
- Requires-Dist: pep8-naming==0.14.1; extra == "dev"
28
+ Requires-Dist: pre-commit>=4.2.0; extra == "dev"
29
+ Requires-Dist: black>=25.1.0; extra == "dev"
30
+ Requires-Dist: flake8>=7.1.2; extra == "dev"
31
+ Requires-Dist: pep8-naming>=0.14.1; extra == "dev"
41
32
  Dynamic: license-file
42
33
 
43
34
  # mgnify-pipelines-toolkit
@@ -74,8 +65,9 @@ Before starting any development, you should do these few steps:
74
65
  - Clone the repo if you haven't already and create a feature branch from the `dev` branch (NOT `main`).
75
66
  - Create a virtual environment with the tool of your choice (i.e. `conda create --name my_new_env`)
76
67
  - Activate you new environment (i.e. `conda activate my_new_env`)
77
- - Install dev dependencies `pip install -e '.[dev]'`
68
+ - Install dev dependencies `pip install -e '.[tests,dev]'`
78
69
  - Install pre-commit hooks `pre-commit install`
70
+ - Run unit tests `pytest`
79
71
 
80
72
  When doing these steps above, you ensure that the code you add will be linted and formatted properly.
81
73
 
@@ -12,7 +12,7 @@ mgnify_pipelines_toolkit/analysis/amplicon/primer_val_classification.py,sha256=B
12
12
  mgnify_pipelines_toolkit/analysis/amplicon/remove_ambiguous_reads.py,sha256=Wu4tRtuRkgd3hoeuwPl_E5ghxIW7e_1vrcvFGWv_U4A,3173
13
13
  mgnify_pipelines_toolkit/analysis/amplicon/rev_comp_se_primers.py,sha256=yLpzkRJXAeXRUNgz60zopEwHcdprM2UDjquE-GkrFys,1722
14
14
  mgnify_pipelines_toolkit/analysis/amplicon/standard_primer_matching.py,sha256=K6gniytuItq5WzHLi1BsaUCOdP4Zm0_ZzW2_ns7-BTI,11114
15
- mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py,sha256=HarDM6ay0MbyDfGGjmxP8epjsXciAJHOmqe8G64gLuM,4258
15
+ mgnify_pipelines_toolkit/analysis/assembly/add_rhea_chebi_annotation.py,sha256=NZSNY2bqs_TQyz8riDqiEFPLKcwTgzh1C7DeVHT6V8Q,4366
16
16
  mgnify_pipelines_toolkit/analysis/assembly/antismash_gff_builder.py,sha256=wXrw1B-z4hOu5oA27Vp1WYxGP2Mk6ZY4i_T5jDZgek0,6954
17
17
  mgnify_pipelines_toolkit/analysis/assembly/combined_gene_caller_merge.py,sha256=Pq-9RSt3RCxzDMQVW1VHlHF4NtpVwCWFbg2CMkvpZZc,19089
18
18
  mgnify_pipelines_toolkit/analysis/assembly/generate_gaf.py,sha256=2T4T7aXMGPac-LZUXJF3lOUzZZF50dAKkKTSaO-4idQ,3587
@@ -20,14 +20,14 @@ mgnify_pipelines_toolkit/analysis/assembly/gff_annotation_utils.py,sha256=IlkeP4
20
20
  mgnify_pipelines_toolkit/analysis/assembly/gff_file_utils.py,sha256=8kv_6KWznOVRkeAtghLf4pxKPhAqdn36LOK4MsTz9hU,3282
21
21
  mgnify_pipelines_toolkit/analysis/assembly/gff_toolkit.py,sha256=uUIo97gmzO2zzN-pYF5paIzeHWBsmmjFp7zGAhf4PKY,5021
22
22
  mgnify_pipelines_toolkit/analysis/assembly/go_utils.py,sha256=eay9e3Xdc8XxnlC_4SHHjN89k-M9i_cFMc2lI_ZFxqY,5596
23
- mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=1nAgfjb1pT80Q82FZ2hSkWCGVD6BoKVP4MIqsFTmQ1c,4024
23
+ mgnify_pipelines_toolkit/analysis/assembly/krona_txt_from_cat_classification.py,sha256=uex2T6GagtYFBIc39-Xm4SFHL06KAQ5v0_loOmY_eaw,4289
24
24
  mgnify_pipelines_toolkit/analysis/assembly/summarise_goslims.py,sha256=TPaKlYkoy37_XgYNOskWCCoXtPNku_k5ygSeK4fT1VQ,6689
25
25
  mgnify_pipelines_toolkit/analysis/genomes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  mgnify_pipelines_toolkit/analysis/shared/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  mgnify_pipelines_toolkit/analysis/shared/convert_cmscan_to_cmsearch_tblout.py,sha256=0Ot1j4LPsEPyPbySSAh6n9s5Dilm_8_M9YQvTnQ-1PQ,4415
28
28
  mgnify_pipelines_toolkit/analysis/shared/dwc_summary_generator.py,sha256=hggPqv9QawWAccm5tmru4VF9VnQAHF5LCXnqyLw_BWI,6727
29
29
  mgnify_pipelines_toolkit/analysis/shared/fastq_suffix_header_check.py,sha256=ye0Jka6_lNn4dQGb2QG3YT46y7QK0QvyaIitIaS8JVQ,4026
30
- mgnify_pipelines_toolkit/analysis/shared/get_subunits.py,sha256=j_UN3hItF7KhJrhGrSqjvZMg-ZwKAMc2sc0vHdJzjQw,4908
30
+ mgnify_pipelines_toolkit/analysis/shared/get_subunits.py,sha256=UrU0CpZj3pfHZWI7Uuhv2a_C0JsO8pnVErY0sWGgNdw,4920
31
31
  mgnify_pipelines_toolkit/analysis/shared/get_subunits_coords.py,sha256=EH5RyzesLqsonnTQbSDs7kAOV6IskS4oyqZYlex1tAY,1934
32
32
  mgnify_pipelines_toolkit/analysis/shared/library_strategy_check.py,sha256=6Ck2NhwRWw66GctUtKDdPT5fwJhWFR_YOZq-Vxwoa8A,1996
33
33
  mgnify_pipelines_toolkit/analysis/shared/mapseq2biom.py,sha256=7-U0DN1joVu0ifLOoDUK2Pfqy8rb1RDKT6khVg3jky0,5559
@@ -44,9 +44,9 @@ mgnify_pipelines_toolkit/schemas/schemas.py,sha256=pnH8LUH8i2ACNvFNWyG-n-eIHZcI5
44
44
  mgnify_pipelines_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  mgnify_pipelines_toolkit/utils/fasta_to_delimited.py,sha256=lgYIR1S4crURY7C7nFtgE6QMV4u4zCNsUrVkcRnsEEo,3996
46
46
  mgnify_pipelines_toolkit/utils/get_mpt_version.py,sha256=aS9bWrC9CP7tpxoEVg6eEYt18-pmjG7fJl5Mchz4YOU,798
47
- mgnify_pipelines_toolkit-1.0.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- mgnify_pipelines_toolkit-1.0.3.dist-info/METADATA,sha256=9_2fpTUZw_ers9JvGS41z9rnvasCWl_Dnh4to-pfSV0,6203
49
- mgnify_pipelines_toolkit-1.0.3.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
50
- mgnify_pipelines_toolkit-1.0.3.dist-info/entry_points.txt,sha256=QZ6vY4w3lYG8Xmll_s9SIsOpkxa5gBVEIxU3GvoCF4I,2946
51
- mgnify_pipelines_toolkit-1.0.3.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
52
- mgnify_pipelines_toolkit-1.0.3.dist-info/RECORD,,
47
+ mgnify_pipelines_toolkit-1.0.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
+ mgnify_pipelines_toolkit-1.0.4.dist-info/METADATA,sha256=Coky89dC0Xh5wHLk7fPGEOk_-fXY3GvvMMtb2dz5krc,5810
49
+ mgnify_pipelines_toolkit-1.0.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
+ mgnify_pipelines_toolkit-1.0.4.dist-info/entry_points.txt,sha256=QZ6vY4w3lYG8Xmll_s9SIsOpkxa5gBVEIxU3GvoCF4I,2946
51
+ mgnify_pipelines_toolkit-1.0.4.dist-info/top_level.txt,sha256=xA_wC7C01V3VwuDnqwRM2QYeJJ45WtvF6LVav4tYxuE,25
52
+ mgnify_pipelines_toolkit-1.0.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.0.2)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5