PyPI - pyobo - Versions diffs - 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

pyobo/__init__.py +0 -2
pyobo/__main__.py +0 -2
pyobo/api/__init__.py +0 -2
pyobo/api/alts.py +6 -7
pyobo/api/hierarchy.py +14 -15
pyobo/api/metadata.py +3 -4
pyobo/api/names.py +31 -32
pyobo/api/properties.py +6 -7
pyobo/api/relations.py +12 -11
pyobo/api/species.py +5 -6
pyobo/api/typedefs.py +1 -3
pyobo/api/utils.py +61 -5
pyobo/api/xrefs.py +4 -5
pyobo/aws.py +3 -5
pyobo/cli/__init__.py +0 -2
pyobo/cli/aws.py +0 -2
pyobo/cli/cli.py +0 -4
pyobo/cli/database.py +1 -3
pyobo/cli/lookup.py +0 -2
pyobo/cli/utils.py +0 -2
pyobo/constants.py +1 -33
pyobo/getters.py +19 -26
pyobo/gilda_utils.py +19 -17
pyobo/identifier_utils.py +10 -10
pyobo/mocks.py +5 -6
pyobo/normalizer.py +24 -24
pyobo/obographs.py +8 -5
pyobo/plugins.py +3 -4
pyobo/py.typed +0 -0
pyobo/reader.py +19 -21
pyobo/registries/__init__.py +0 -2
pyobo/registries/metaregistry.py +6 -8
pyobo/resource_utils.py +1 -3
pyobo/resources/__init__.py +0 -2
pyobo/resources/ncbitaxon.py +2 -3
pyobo/resources/ro.py +2 -4
pyobo/resources/so.py +55 -0
pyobo/resources/so.tsv +2604 -0
pyobo/sources/README.md +15 -0
pyobo/sources/__init__.py +0 -2
pyobo/sources/agrovoc.py +3 -3
pyobo/sources/antibodyregistry.py +2 -3
pyobo/sources/biogrid.py +4 -4
pyobo/sources/ccle.py +3 -4
pyobo/sources/cgnc.py +1 -3
pyobo/sources/chebi.py +2 -4
pyobo/sources/chembl.py +1 -3
pyobo/sources/civic_gene.py +2 -3
pyobo/sources/complexportal.py +57 -20
pyobo/sources/conso.py +2 -4
pyobo/sources/cpt.py +1 -3
pyobo/sources/credit.py +1 -1
pyobo/sources/cvx.py +1 -3
pyobo/sources/depmap.py +3 -4
pyobo/sources/dictybase_gene.py +15 -12
pyobo/sources/drugbank.py +6 -7
pyobo/sources/drugbank_salt.py +3 -4
pyobo/sources/drugcentral.py +9 -8
pyobo/sources/expasy.py +33 -16
pyobo/sources/famplex.py +3 -5
pyobo/sources/flybase.py +5 -6
pyobo/sources/geonames.py +1 -1
pyobo/sources/gmt_utils.py +5 -6
pyobo/sources/go.py +4 -6
pyobo/sources/gwascentral_phenotype.py +1 -3
pyobo/sources/gwascentral_study.py +2 -3
pyobo/sources/hgnc.py +30 -26
pyobo/sources/hgncgenefamily.py +9 -11
pyobo/sources/icd10.py +3 -4
pyobo/sources/icd11.py +3 -4
pyobo/sources/icd_utils.py +6 -7
pyobo/sources/interpro.py +3 -5
pyobo/sources/itis.py +1 -3
pyobo/sources/kegg/__init__.py +0 -2
pyobo/sources/kegg/api.py +3 -4
pyobo/sources/kegg/genes.py +3 -4
pyobo/sources/kegg/genome.py +19 -9
pyobo/sources/kegg/pathway.py +5 -6
pyobo/sources/mesh.py +19 -21
pyobo/sources/mgi.py +1 -3
pyobo/sources/mirbase.py +13 -9
pyobo/sources/mirbase_constants.py +0 -2
pyobo/sources/mirbase_family.py +1 -3
pyobo/sources/mirbase_mature.py +1 -3
pyobo/sources/msigdb.py +4 -5
pyobo/sources/ncbigene.py +3 -5
pyobo/sources/npass.py +2 -4
pyobo/sources/omim_ps.py +1 -3
pyobo/sources/pathbank.py +35 -28
pyobo/sources/pfam.py +1 -3
pyobo/sources/pfam_clan.py +1 -3
pyobo/sources/pid.py +3 -5
pyobo/sources/pombase.py +7 -6
pyobo/sources/pubchem.py +2 -3
pyobo/sources/reactome.py +30 -11
pyobo/sources/rgd.py +3 -4
pyobo/sources/rhea.py +7 -8
pyobo/sources/ror.py +3 -2
pyobo/sources/selventa/__init__.py +0 -2
pyobo/sources/selventa/schem.py +1 -3
pyobo/sources/selventa/scomp.py +1 -3
pyobo/sources/selventa/sdis.py +1 -3
pyobo/sources/selventa/sfam.py +1 -3
pyobo/sources/sgd.py +1 -3
pyobo/sources/slm.py +29 -17
pyobo/sources/umls/__init__.py +0 -2
pyobo/sources/umls/__main__.py +0 -2
pyobo/sources/umls/get_synonym_types.py +1 -1
pyobo/sources/umls/umls.py +2 -4
pyobo/sources/uniprot/__init__.py +0 -2
pyobo/sources/uniprot/uniprot.py +11 -10
pyobo/sources/uniprot/uniprot_ptm.py +6 -5
pyobo/sources/utils.py +3 -5
pyobo/sources/wikipathways.py +1 -3
pyobo/sources/zfin.py +20 -9
pyobo/ssg/__init__.py +3 -2
pyobo/struct/__init__.py +0 -2
pyobo/struct/reference.py +22 -23
pyobo/struct/struct.py +132 -116
pyobo/struct/typedef.py +14 -10
pyobo/struct/utils.py +0 -2
pyobo/utils/__init__.py +0 -2
pyobo/utils/cache.py +14 -6
pyobo/utils/io.py +9 -10
pyobo/utils/iter.py +5 -6
pyobo/utils/misc.py +1 -3
pyobo/utils/ndex_utils.py +6 -7
pyobo/utils/path.py +4 -5
pyobo/version.py +3 -5
pyobo/xrefdb/__init__.py +0 -2
pyobo/xrefdb/canonicalizer.py +27 -18
pyobo/xrefdb/priority.py +0 -2
pyobo/xrefdb/sources/__init__.py +3 -4
pyobo/xrefdb/sources/biomappings.py +0 -2
pyobo/xrefdb/sources/cbms2019.py +0 -2
pyobo/xrefdb/sources/chembl.py +0 -2
pyobo/xrefdb/sources/compath.py +1 -3
pyobo/xrefdb/sources/famplex.py +3 -5
pyobo/xrefdb/sources/gilda.py +0 -2
pyobo/xrefdb/sources/intact.py +5 -5
pyobo/xrefdb/sources/ncit.py +1 -3
pyobo/xrefdb/sources/pubchem.py +2 -5
pyobo/xrefdb/sources/wikidata.py +2 -4
pyobo/xrefdb/xrefs_pipeline.py +15 -16
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
pyobo-0.11.1.dist-info/METADATA +711 -0
pyobo-0.11.1.dist-info/RECORD +173 -0
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
pyobo-0.11.1.dist-info/entry_points.txt +2 -0
pyobo-0.10.12.dist-info/METADATA +0 -499
pyobo-0.10.12.dist-info/RECORD +0 -169
pyobo-0.10.12.dist-info/entry_points.txt +0 -15
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0

pyobo/sources/famplex.py CHANGED Viewed

@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for FamPlex."""
 import logging
 from collections import defaultdict
-from typing import Iterable, List, Mapping, Tuple
+from collections.abc import Iterable, Mapping
 import bioregistry
 from pystow.utils import get_commit
@@ -62,7 +60,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         dtype=str,
         force=force,
     )
-    id_to_definition: Mapping[str, Tuple[str, str]] = {
+    id_to_definition: Mapping[str, tuple[str, str]] = {
         identifier: (definition, provenance)
         for identifier, provenance, definition in definitions_df.values
     }
@@ -140,7 +138,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         yield term
-def _get_xref_df(version: str) -> Mapping[str, List[Reference]]:
+def _get_xref_df(version: str) -> Mapping[str, list[Reference]]:
     base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
     xrefs_url = f"{base_url}/equivalences.csv"
     xrefs_df = ensure_df(PREFIX, url=xrefs_url, version=version, header=None, sep=",", dtype=str)

pyobo/sources/flybase.py CHANGED Viewed

@@ -1,14 +1,13 @@
-# -*- coding: utf-8 -*-
 """Converter for FlyBase Genes."""
 import logging
-from typing import Iterable, Mapping, Set
+from collections.abc import Iterable, Mapping
 import pandas as pd
 from tqdm.auto import tqdm
 from pyobo import Reference
+from pyobo.resources.so import get_so_name
 from pyobo.struct import Obo, Term, from_species, orthologous
 from pyobo.utils.io import multisetdict
 from pyobo.utils.path import ensure_df
@@ -68,7 +67,7 @@ def _get_definitions(version: str, force: bool = False) -> Mapping[str, str]:
     return dict(df.values)
-def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, Set[str]]:
+def _get_human_orthologs(version: str, force: bool = False) -> Mapping[str, set[str]]:
     url = (
         f"http://ftp.flybase.net/releases/FB{version}/precomputed_files/"
         f"orthologs/dmel_human_orthologs_disease_fb_{version}.tsv.gz"
@@ -135,7 +134,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
                 "FlyBase gene type is missing mapping to Sequence Ontology (SO): %s", gtype
             )
         else:
-            so[gtype] = Reference.auto("SO", so_id)
+            so[gtype] = Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))
     for _, reference in sorted(so.items()):
         yield Term(reference=reference)
@@ -155,7 +154,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         for hgnc_curie in human_orthologs.get(identifier, []):
             if not hgnc_curie or pd.isna(hgnc_curie):
                 continue
-            hgnc_ortholog = Reference.from_curie(hgnc_curie, auto=True)
+            hgnc_ortholog = Reference.from_curie(hgnc_curie)
             if hgnc_ortholog is None:
                 tqdm.write(f"[{PREFIX}] {identifier} had invalid ortholog: {hgnc_curie}")
             else:

pyobo/sources/geonames.py CHANGED Viewed

@@ -3,7 +3,7 @@
 from __future__ import annotations
 import logging
-from typing import Collection, Iterable, Mapping
+from collections.abc import Collection, Iterable, Mapping
 import pandas as pd
 from pystow.utils import read_zipfile_csv

pyobo/sources/gmt_utils.py CHANGED Viewed

@@ -1,12 +1,11 @@
-# -*- coding: utf-8 -*-
 """GMT utilities."""
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Set, Tuple, Union
+from typing import Union
-GMTSummary = Tuple[str, str, Set[str]]
-WikiPathwaysGMTSummary = Tuple[str, str, str, str, str, Set[str]]
+GMTSummary = tuple[str, str, set[str]]
+WikiPathwaysGMTSummary = tuple[str, str, str, str, str, set[str]]
 def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
@@ -20,7 +19,7 @@ def parse_gmt_file(path: Union[str, Path]) -> Iterable[GMTSummary]:
             yield _process_line(line)
-def _process_line(line: str) -> Tuple[str, str, Set[str]]:
+def _process_line(line: str) -> tuple[str, str, set[str]]:
     """Return the pathway name, url, and gene sets associated.
     :param line: gmt file line

pyobo/sources/go.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Gene Ontology."""
 from pyobo import get_descendants
@@ -14,13 +12,13 @@ __all__ = [
 def is_biological_process(identifier: str) -> bool:
     """Return if the given GO identifier is a biological process.
-    >>> is_biological_process('0006915')
+    >>> is_biological_process("0006915")
     True
-    >>> is_biological_process('GO:0006915')
+    >>> is_biological_process("GO:0006915")
     True
-    >>> is_molecular_function('0006915')
+    >>> is_molecular_function("0006915")
     False
-    >>> is_cellular_component('0006915')
+    >>> is_cellular_component("0006915")
     False
     """
     return _is_descendant(identifier, "0008150")

pyobo/sources/gwascentral_phenotype.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for GWAS Central Phenotypes."""
 import json
-from typing import Iterable
+from collections.abc import Iterable
 from tqdm.auto import tqdm, trange

pyobo/sources/gwascentral_study.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# -*- coding: utf-8 -*-
 """Converter for GWAS Central."""
 import logging
 import tarfile
-from typing import Iterable, Optional
+from collections.abc import Iterable
+from typing import Optional
 from xml.etree import ElementTree
 from pyobo.struct import Obo, Reference, Term, has_part

pyobo/sources/hgnc.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Converter for HGNC."""
 import itertools as itt
@@ -7,13 +5,15 @@ import json
 import logging
 import typing
 from collections import Counter, defaultdict
+from collections.abc import Iterable
 from operator import attrgetter
-from typing import DefaultDict, Dict, Iterable, Optional
+from typing import Optional
 from tabulate import tabulate
 from tqdm.auto import tqdm
 from pyobo.api.utils import get_version
+from pyobo.resources.so import get_so_name
 from pyobo.struct import (
     Obo,
     Reference,
@@ -38,8 +38,8 @@ logger = logging.getLogger(__name__)
 PREFIX = "hgnc"
 DEFINITIONS_URL_FMT = (
-    "http://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/"
-    "archive/monthly/json/hgnc_complete_set_{version}.json"
+    "https://storage.googleapis.com/public-download-files/hgnc/archive/archive/monthly/json/"
+    "hgnc_complete_set_{version}.json"
 )
 previous_symbol_type = SynonymTypeDef.from_text("previous_symbol")
@@ -223,7 +223,7 @@ class HGNCGetter(Obo):
         alias_symbol_type,
     ]
     root_terms = [
-        Reference(prefix="so", identifier=so_id)
+        Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))
         for so_id in sorted(set(LOCUS_TYPE_TO_SO.values()))
         if so_id
     ]
@@ -238,12 +238,12 @@ def get_obo(*, force: bool = False) -> Obo:
     return HGNCGetter(force=force)
-def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:  # noqa:C901
+def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
     """Get HGNC terms."""
     if version is None:
         version = get_version("hgnc")
     unhandled_entry_keys: typing.Counter[str] = Counter()
-    unhandle_locus_types: DefaultDict[str, Dict[str, Term]] = defaultdict(dict)
+    unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict)
     path = ensure_path(
         PREFIX,
         url=DEFINITIONS_URL_FMT.format(version=version),
@@ -257,7 +257,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
     yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens")
     yield from sorted(
         {
-            Term(reference=Reference.auto("SO", so_id))
+            Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
             for so_id in sorted(LOCUS_TYPE_TO_SO.values())
             if so_id
         },
@@ -364,23 +364,25 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
             xref_identifiers = entry.pop(key, None)
             if xref_identifiers is None:
                 continue
             if isinstance(xref_identifiers, (str, int)):
+                xref_identifiers = [str(xref_identifiers)]
+            if xref_prefix == "merops.entry":
+                continue
+                # e.g., XM02-001 should be rewritten as XM02.001
+                xref_identifiers = [i.replace("-", ".") for i in xref_identifiers]
+            if xref_prefix == "refseq":
+                # e.g., strip off dots without substantiated record versions like in NM_021728.
+                xref_identifiers = [i.strip(".") for i in xref_identifiers]
+            if len(xref_identifiers) == 1:
                 term.append_exact_match(
-                    Reference(prefix=xref_prefix, identifier=str(xref_identifiers))
+                    Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
                 )
-            elif isinstance(xref_identifiers, list):
-                if len(xref_identifiers) == 1:
-                    term.append_exact_match(
-                        Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
-                    )
-                else:
-                    for xref_identifier in xref_identifiers:
-                        term.append_xref(
-                            Reference(prefix=xref_prefix, identifier=str(xref_identifier))
-                        )
             else:
-                raise TypeError
+                for xref_identifier in xref_identifiers:
+                    term.append_xref(Reference(prefix=xref_prefix, identifier=str(xref_identifier)))
         for pubmed_id in entry.pop("pubmed_id", []):
             term.append_provenance(Reference(prefix="pubmed", identifier=str(pubmed_id)))
@@ -417,9 +419,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         locus_group = entry.pop("locus_group")
         so_id = LOCUS_TYPE_TO_SO.get(locus_type)
         if so_id:
-            term.append_parent(Reference.auto("SO", so_id))
+            term.append_parent(Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
         else:
-            term.append_parent(Reference.auto("SO", "0000704"))  # gene
+            term.append_parent(
+                Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
+            )  # gene
             unhandle_locus_types[locus_type][identifier] = term
             term.append_property("locus_type", locus_type)
             term.append_property("locus_group", locus_group)
@@ -459,8 +463,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
                 headers=["hgnc_id", "name", "obsolete", "link", "provenance"],
                 tablefmt="github",
             )
-            print(f"## {k} ({len(v)})", file=file)  # noqa: T201
-            print(t, "\n", file=file)  # noqa: T201
+            print(f"## {k} ({len(v)})", file=file)
+            print(t, "\n", file=file)
     unhandle_locus_type_counter = Counter(
         {locus_type: len(d) for locus_type, d in unhandle_locus_types.items()}

pyobo/sources/hgncgenefamily.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for HGNC Gene Families."""
 from collections import defaultdict
-from typing import Iterable, List, Mapping
+from collections.abc import Iterable, Mapping
 import pandas as pd
@@ -23,13 +21,13 @@ __all__ = [
 ]
 PREFIX = "hgnc.genegroup"
-FAMILIES_URL = "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/csv/genefamily_db_tables/family.csv"
+FAMILIES_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/family.csv"
 # TODO use family_alias.csv
-HIERARCHY_URL = (
-    "ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/csv/genefamily_db_tables/hierarchy.csv"
-)
+HIERARCHY_URL = "https://storage.googleapis.com/public-download-files/hgnc/csv/csv/genefamily_db_tables/hierarchy.csv"
-symbol_type = SynonymTypeDef.from_text("symbol")
+symbol_type = SynonymTypeDef(
+    reference=Reference(prefix="OMO", identifier="0004000", name="has symbol")
+)
 class HGNCGroupGetter(Obo):
@@ -50,7 +48,7 @@ def get_obo(force: bool = False) -> Obo:
     return HGNCGroupGetter(force=force)
-def get_hierarchy(force: bool = False) -> Mapping[str, List[str]]:
+def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
     """Get the HGNC Gene Families hierarchy as a dictionary."""
     path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
     df = pd.read_csv(path, dtype={"parent_fam_id": str, "child_fam_id": str})
@@ -80,7 +78,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
                     name=parent.name,
                 )
             )
-    gene_group = Reference.auto("SO", "0005855")
+    gene_group = Reference(prefix="SO", identifier="0005855", name="gene group")
     yield Term(reference=gene_group)
     for term in terms:
         if not term.parents:
@@ -100,7 +98,7 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
             definition=definition,
         )
         if pubmed_ids and pd.notna(pubmed_ids):
-            for s in pubmed_ids.split(","):
+            for s in pubmed_ids.replace(" ", ",").split(","):
                 term.append_provenance(Reference(prefix="pubmed", identifier=s.strip()))
         if desc_go and pd.notna(desc_go):
             go_id = desc_go[len("http://purl.uniprot.org/go/") :]

pyobo/sources/icd10.py CHANGED Viewed

@@ -1,12 +1,11 @@
-# -*- coding: utf-8 -*-
 """Convert ICD-10 to OBO.
 Run with python -m pyobo.sources.icd10 -v
 """
 import logging
-from typing import Any, Iterable, Mapping, Set
+from collections.abc import Iterable, Mapping
+from typing import Any
 import click
 from more_click import verbose_option
@@ -57,7 +56,7 @@ def iter_terms() -> Iterable[Term]:
     chapter_urls = res_json["child"]
     tqdm.write(f"there are {len(chapter_urls)} chapters")
-    visited_identifiers: Set[str] = set()
+    visited_identifiers: set[str] = set()
     for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
         yield from visiter(
             identifier,

pyobo/sources/icd11.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Convert ICD11 to OBO.
 Run with python -m pyobo.sources.icd11 -v
@@ -8,7 +6,8 @@ Run with python -m pyobo.sources.icd11 -v
 import json
 import logging
 import os
-from typing import Any, Iterable, Mapping, Set
+from collections.abc import Iterable, Mapping
+from typing import Any
 import click
 from more_click import verbose_option
@@ -67,7 +66,7 @@ def iterate_icd11() -> Iterable[Term]:
     tqdm.write(f'There are {len(res_json["child"])} top level entities')
-    visited_identifiers: Set[str] = set()
+    visited_identifiers: set[str] = set()
     for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
         yield from visiter(
             identifier,

pyobo/sources/icd_utils.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Utilities or interacting with the ICD API.
 Want to get your own API cliend ID and client secret?
@@ -11,8 +9,9 @@ Want to get your own API cliend ID and client secret?
 import datetime
 import json
 import os
+from collections.abc import Iterable, Mapping
 from pathlib import Path
-from typing import Any, Callable, Iterable, List, Mapping, Set, Union
+from typing import Any, Callable, Union
 import pystow
 import requests
@@ -20,7 +19,7 @@ from cachier import cachier
 from pystow.config_api import ConfigError
 from tqdm.auto import tqdm
-from ..getters import NoBuild
+from ..getters import NoBuildError
 from ..struct import Term
 TOKEN_URL = "https://icdaccessmanagement.who.int/connect/token"  # noqa:S105
@@ -43,7 +42,7 @@ def _get_entity(endpoint: str, identifier: str):
     return res.json()
-def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> List[str]:
+def get_child_identifiers(endpoint: str, res_json: Mapping[str, Any]) -> list[str]:
     """Ge the child identifiers."""
     return [url[len(endpoint) :].lstrip("/") for url in res_json.get("child", [])]
@@ -55,7 +54,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
         icd_client_id = pystow.get_config("pyobo", "icd_client_id", raise_on_missing=True)
         icd_client_secret = pystow.get_config("pyobo", "icd_client_secret", raise_on_missing=True)
     except ConfigError as e:
-        raise NoBuild from e
+        raise NoBuildError from e
     grant_type = "client_credentials"
     body_params = {"grant_type": grant_type}
@@ -73,7 +72,7 @@ def get_icd_api_headers() -> Mapping[str, str]:
 def visiter(
     identifier: str,
-    visited_identifiers: Set[str],
+    visited_identifiers: set[str],
     directory: Union[str, Path],
     *,
     endpoint: str,

pyobo/sources/interpro.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for InterPro."""
 from collections import defaultdict
-from typing import DefaultDict, Iterable, List, Mapping, Set, Tuple
+from collections.abc import Iterable, Mapping
 from .utils import get_go_mapping
 from ..struct import Obo, Reference, Term
@@ -82,7 +80,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
         yield term
-def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, Set[Tuple[str, str]]]:
+def get_interpro_go_df(version: str, force: bool = False) -> Mapping[str, set[tuple[str, str]]]:
     """Get InterPro to Gene Ontology molecular function mapping."""
     url = f"https://ftp.ebi.ac.uk/pub/databases/interpro/releases/{version}/interpro2go"
     path = ensure_path(PREFIX, url=url, name="interpro2go.tsv", version=version, force=force)
@@ -98,7 +96,7 @@ def get_interpro_tree(version: str, force: bool = False):
 def _parse_tree_helper(lines: Iterable[str]):
-    rv1: DefaultDict[str, List[str]] = defaultdict(list)
+    rv1: defaultdict[str, list[str]] = defaultdict(list)
     previous_depth, previous_id = 0, ""
     stack = [previous_id]

pyobo/sources/itis.py CHANGED Viewed

@@ -1,13 +1,11 @@
-# -*- coding: utf-8 -*-
 """Converter for the Integrated Taxonomic Information System (ITIS)."""
 import os
 import shutil
 import sqlite3
 import zipfile
+from collections.abc import Iterable
 from contextlib import closing
-from typing import Iterable
 from pyobo.struct import Obo, Reference, Term
 from pyobo.utils.io import multidict

pyobo/sources/kegg/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """KEGG Databases."""
 from .genes import KEGGGeneGetter

pyobo/sources/kegg/api.py CHANGED Viewed

@@ -1,10 +1,9 @@
-# -*- coding: utf-8 -*-
 """API utilities for KEGG."""
 import urllib.error
+from collections.abc import Mapping
 from dataclasses import dataclass
-from typing import Mapping, Optional
+from typing import Optional
 from pyobo import Reference, Term, ensure_path
 from pyobo.struct import from_species
@@ -132,7 +131,7 @@ def _ensure_conv_genome_helper(
             version=version,
         )
         with path_rv.open("w") as file:
-            print(file=file)  # noqa: T201
+            print(file=file)
         return path_rv.as_posix()
     except FileNotFoundError:
         return None

pyobo/sources/kegg/genes.py CHANGED Viewed

@@ -1,12 +1,11 @@
-# -*- coding: utf-8 -*-
 """Convert KEGG Genes to OBO.
 Run with ``python -m pyobo.sources.kegg.genes``
 """
 import logging
-from typing import Iterable, Optional
+from collections.abc import Iterable
+from typing import Optional
 import click
 from more_click import verbose_option
@@ -90,7 +89,7 @@ def _make_terms(
                 )
                 continue
             if ";" in line:
-                *_extras, name = [part.strip() for part in extras.split(";")]
+                *_extras, name = (part.strip() for part in extras.split(";"))
             else:
                 name = extras

pyobo/sources/kegg/genome.py CHANGED Viewed

@@ -1,12 +1,12 @@
-# -*- coding: utf-8 -*-
 """Convert KEGG Genome to OBO.
 Run with ``python -m pyobo.sources.kegg.genome``
 """
+from __future__ import annotations
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 from tqdm.auto import tqdm
@@ -48,8 +48,11 @@ def get_obo() -> Obo:
     return KEGGGenomeGetter()
-def parse_genome_line(line: str) -> KEGGGenome:
+def parse_genome_line(line: str) -> KEGGGenome | None:
     """Parse a line from the KEGG Genome database."""
+    if not line.startswith("T"):
+        #  This is for an NCBI Taxonomy
+        return None
     line = line.strip()
     identifier, rest = _s(line, "\t")
     identifier = identifier[len("gn:") :]
@@ -96,6 +99,8 @@ def iter_kegg_genomes(version: str, desc: str) -> Iterable[KEGGGenome]:
     it = tqdm(lines, desc=desc, unit_scale=True, unit="genome")
     for line in it:
         yv = parse_genome_line(line)
+        if yv is None:
+            continue
         it.set_postfix({"id": yv.identifier, "name": yv.name})
         yield yv
@@ -107,11 +112,16 @@ def iter_terms(version: str) -> Iterable[Term]:
     for kegg_genome in iter_kegg_genomes(version=version, desc="KEGG Genomes"):
         if kegg_genome.identifier in SKIP:
             continue
-        term = Term.from_triple(
-            prefix=KEGG_GENOME_PREFIX,
-            identifier=kegg_genome.identifier,
-            name=kegg_genome.name,
-        )
+        try:
+            reference = Reference(
+                prefix=KEGG_GENOME_PREFIX, identifier=kegg_genome.identifier, name=kegg_genome.name
+            )
+        except ValueError:
+            tqdm.write(f"[{KEGG_GENOME_PREFIX}] invalid identifier: {kegg_genome}")
+            continue
+        term = Term(reference=reference)
         if kegg_genome.taxonomy_id is not None:
             taxonomy_name = get_ncbitaxon_name(kegg_genome.taxonomy_id)
             if taxonomy_name is None:

pyobo/sources/kegg/pathway.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Convert KEGG Pathways to OBO.
 Run with ``python -m pyobo.sources.kegg.pathway``
@@ -8,8 +6,9 @@ Run with ``python -m pyobo.sources.kegg.pathway``
 import logging
 import urllib.error
 from collections import defaultdict
+from collections.abc import Iterable, Mapping
 from functools import partial
-from typing import Iterable, List, Mapping, Tuple, Union
+from typing import Union
 from tqdm.auto import tqdm
 from tqdm.contrib.concurrent import thread_map
@@ -76,7 +75,7 @@ def iter_terms(version: str, skip_missing: bool = True) -> Iterable[Term]:
         )
-def _get_link_pathway_map(path: str) -> Mapping[str, List[str]]:
+def _get_link_pathway_map(path: str) -> Mapping[str, list[str]]:
     rv = defaultdict(list)
     with open(path) as file:
         for line in file:
@@ -110,7 +109,7 @@ def _iter_genome_terms(
         list_pathway_lines = [line.strip() for line in file]
     for line in list_pathway_lines:
         line = line.strip()
-        pathway_id, name = [part.strip() for part in line.split("\t")]
+        pathway_id, name = (part.strip() for part in line.split("\t"))
         pathway_id = pathway_id[len("path:") :]
         terms[pathway_id] = term = Term.from_triple(
@@ -149,7 +148,7 @@ def _iter_genome_terms(
 def iter_kegg_pathway_paths(
     version: str, skip_missing: bool = True
-) -> Iterable[Union[Tuple[KEGGGenome, str, str], Tuple[None, None, None]]]:
+) -> Iterable[Union[tuple[KEGGGenome, str, str], tuple[None, None, None]]]:
     """Get paths for the KEGG Pathway files."""
     genomes = list(iter_kegg_genomes(version=version, desc="KEGG Pathways"))
     func = partial(_process_genome, version=version, skip_missing=skip_missing)

pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl