PyPI - pyobo - Versions diffs - 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl - Mend

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

pyobo/__init__.py +0 -2
pyobo/__main__.py +0 -2
pyobo/api/__init__.py +0 -2
pyobo/api/alts.py +6 -7
pyobo/api/hierarchy.py +14 -15
pyobo/api/metadata.py +3 -4
pyobo/api/names.py +31 -32
pyobo/api/properties.py +6 -7
pyobo/api/relations.py +12 -11
pyobo/api/species.py +5 -6
pyobo/api/typedefs.py +1 -3
pyobo/api/utils.py +61 -5
pyobo/api/xrefs.py +4 -5
pyobo/aws.py +3 -5
pyobo/cli/__init__.py +0 -2
pyobo/cli/aws.py +0 -2
pyobo/cli/cli.py +0 -4
pyobo/cli/database.py +1 -3
pyobo/cli/lookup.py +0 -2
pyobo/cli/utils.py +0 -2
pyobo/constants.py +1 -33
pyobo/getters.py +19 -26
pyobo/gilda_utils.py +19 -17
pyobo/identifier_utils.py +10 -10
pyobo/mocks.py +5 -6
pyobo/normalizer.py +24 -24
pyobo/obographs.py +8 -5
pyobo/plugins.py +3 -4
pyobo/py.typed +0 -0
pyobo/reader.py +19 -21
pyobo/registries/__init__.py +0 -2
pyobo/registries/metaregistry.py +6 -8
pyobo/resource_utils.py +1 -3
pyobo/resources/__init__.py +0 -2
pyobo/resources/ncbitaxon.py +2 -3
pyobo/resources/ro.py +2 -4
pyobo/resources/so.py +55 -0
pyobo/resources/so.tsv +2604 -0
pyobo/sources/README.md +15 -0
pyobo/sources/__init__.py +0 -2
pyobo/sources/agrovoc.py +3 -3
pyobo/sources/antibodyregistry.py +2 -3
pyobo/sources/biogrid.py +4 -4
pyobo/sources/ccle.py +3 -4
pyobo/sources/cgnc.py +1 -3
pyobo/sources/chebi.py +2 -4
pyobo/sources/chembl.py +1 -3
pyobo/sources/civic_gene.py +2 -3
pyobo/sources/complexportal.py +57 -20
pyobo/sources/conso.py +2 -4
pyobo/sources/cpt.py +1 -3
pyobo/sources/credit.py +1 -1
pyobo/sources/cvx.py +1 -3
pyobo/sources/depmap.py +3 -4
pyobo/sources/dictybase_gene.py +15 -12
pyobo/sources/drugbank.py +6 -7
pyobo/sources/drugbank_salt.py +3 -4
pyobo/sources/drugcentral.py +9 -8
pyobo/sources/expasy.py +33 -16
pyobo/sources/famplex.py +3 -5
pyobo/sources/flybase.py +5 -6
pyobo/sources/geonames.py +1 -1
pyobo/sources/gmt_utils.py +5 -6
pyobo/sources/go.py +4 -6
pyobo/sources/gwascentral_phenotype.py +1 -3
pyobo/sources/gwascentral_study.py +2 -3
pyobo/sources/hgnc.py +30 -26
pyobo/sources/hgncgenefamily.py +9 -11
pyobo/sources/icd10.py +3 -4
pyobo/sources/icd11.py +3 -4
pyobo/sources/icd_utils.py +6 -7
pyobo/sources/interpro.py +3 -5
pyobo/sources/itis.py +1 -3
pyobo/sources/kegg/__init__.py +0 -2
pyobo/sources/kegg/api.py +3 -4
pyobo/sources/kegg/genes.py +3 -4
pyobo/sources/kegg/genome.py +19 -9
pyobo/sources/kegg/pathway.py +5 -6
pyobo/sources/mesh.py +19 -21
pyobo/sources/mgi.py +1 -3
pyobo/sources/mirbase.py +13 -9
pyobo/sources/mirbase_constants.py +0 -2
pyobo/sources/mirbase_family.py +1 -3
pyobo/sources/mirbase_mature.py +1 -3
pyobo/sources/msigdb.py +4 -5
pyobo/sources/ncbigene.py +3 -5
pyobo/sources/npass.py +2 -4
pyobo/sources/omim_ps.py +1 -3
pyobo/sources/pathbank.py +35 -28
pyobo/sources/pfam.py +1 -3
pyobo/sources/pfam_clan.py +1 -3
pyobo/sources/pid.py +3 -5
pyobo/sources/pombase.py +7 -6
pyobo/sources/pubchem.py +2 -3
pyobo/sources/reactome.py +30 -11
pyobo/sources/rgd.py +3 -4
pyobo/sources/rhea.py +7 -8
pyobo/sources/ror.py +3 -2
pyobo/sources/selventa/__init__.py +0 -2
pyobo/sources/selventa/schem.py +1 -3
pyobo/sources/selventa/scomp.py +1 -3
pyobo/sources/selventa/sdis.py +1 -3
pyobo/sources/selventa/sfam.py +1 -3
pyobo/sources/sgd.py +1 -3
pyobo/sources/slm.py +29 -17
pyobo/sources/umls/__init__.py +0 -2
pyobo/sources/umls/__main__.py +0 -2
pyobo/sources/umls/get_synonym_types.py +1 -1
pyobo/sources/umls/umls.py +2 -4
pyobo/sources/uniprot/__init__.py +0 -2
pyobo/sources/uniprot/uniprot.py +11 -10
pyobo/sources/uniprot/uniprot_ptm.py +6 -5
pyobo/sources/utils.py +3 -5
pyobo/sources/wikipathways.py +1 -3
pyobo/sources/zfin.py +20 -9
pyobo/ssg/__init__.py +3 -2
pyobo/struct/__init__.py +0 -2
pyobo/struct/reference.py +22 -23
pyobo/struct/struct.py +132 -116
pyobo/struct/typedef.py +14 -10
pyobo/struct/utils.py +0 -2
pyobo/utils/__init__.py +0 -2
pyobo/utils/cache.py +14 -6
pyobo/utils/io.py +9 -10
pyobo/utils/iter.py +5 -6
pyobo/utils/misc.py +1 -3
pyobo/utils/ndex_utils.py +6 -7
pyobo/utils/path.py +4 -5
pyobo/version.py +3 -5
pyobo/xrefdb/__init__.py +0 -2
pyobo/xrefdb/canonicalizer.py +27 -18
pyobo/xrefdb/priority.py +0 -2
pyobo/xrefdb/sources/__init__.py +3 -4
pyobo/xrefdb/sources/biomappings.py +0 -2
pyobo/xrefdb/sources/cbms2019.py +0 -2
pyobo/xrefdb/sources/chembl.py +0 -2
pyobo/xrefdb/sources/compath.py +1 -3
pyobo/xrefdb/sources/famplex.py +3 -5
pyobo/xrefdb/sources/gilda.py +0 -2
pyobo/xrefdb/sources/intact.py +5 -5
pyobo/xrefdb/sources/ncit.py +1 -3
pyobo/xrefdb/sources/pubchem.py +2 -5
pyobo/xrefdb/sources/wikidata.py +2 -4
pyobo/xrefdb/xrefs_pipeline.py +15 -16
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/LICENSE +1 -1
pyobo-0.11.1.dist-info/METADATA +711 -0
pyobo-0.11.1.dist-info/RECORD +173 -0
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/WHEEL +1 -1
pyobo-0.11.1.dist-info/entry_points.txt +2 -0
pyobo-0.10.12.dist-info/METADATA +0 -499
pyobo-0.10.12.dist-info/RECORD +0 -169
pyobo-0.10.12.dist-info/entry_points.txt +0 -15
{pyobo-0.10.12.dist-info → pyobo-0.11.1.dist-info}/top_level.txt +0 -0

pyobo/sources/mesh.py CHANGED Viewed

@@ -1,17 +1,16 @@
-# -*- coding: utf-8 -*-
 """Parser for the MeSH descriptors."""
 import datetime
 import itertools as itt
 import logging
 import re
-from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Tuple
+from collections.abc import Collection, Iterable, Mapping
+from typing import Any, Optional
 from xml.etree.ElementTree import Element
 from tqdm.auto import tqdm
-from pyobo.api.utils import get_version
+from pyobo.api.utils import safe_get_version
 from pyobo.identifier_utils import standardize_ec
 from pyobo.struct import Obo, Reference, Synonym, Term
 from pyobo.utils.cache import cached_json, cached_mapping
@@ -70,7 +69,7 @@ def get_tree_to_mesh_id(version: str) -> Mapping[str, str]:
 def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get MeSH OBO terms."""
-    mesh_id_to_term: Dict[str, Term] = {}
+    mesh_id_to_term: dict[str, Term] = {}
     descriptors = ensure_mesh_descriptors(version=version, force=force)
     supplemental_records = ensure_mesh_supplemental_records(version=version, force=force)
@@ -80,8 +79,8 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         name = entry["name"]
         definition = entry.get("scope_note")
-        xrefs: List[Reference] = []
-        synonyms: Set[str] = set()
+        xrefs: list[Reference] = []
+        synonyms: set[str] = set()
         for concept in entry["concepts"]:
             synonyms.add(concept["name"])
             for term in concept["terms"]:
@@ -107,7 +106,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
 def ensure_mesh_descriptors(
     version: str, force: bool = False, force_process: bool = False
-) -> List[Mapping[str, Any]]:
+) -> list[Mapping[str, Any]]:
     """Get the parsed MeSH dictionary, and cache it if it wasn't already."""
     @cached_json(path=prefix_directory_join(PREFIX, name="desc.json", version=version), force=force)
@@ -133,7 +132,7 @@ def get_supplemental_url(version: str) -> str:
     return f"https://nlmpubs.nlm.nih.gov/projects/mesh/{version}/xmlmesh/supp{version}.gz"
-def ensure_mesh_supplemental_records(version: str, force: bool = False) -> List[Mapping[str, Any]]:
+def ensure_mesh_supplemental_records(version: str, force: bool = False) -> list[Mapping[str, Any]]:
     """Get the parsed MeSH dictionary, and cache it if it wasn't already."""
     @cached_json(path=prefix_directory_join(PREFIX, name="supp.json", version=version), force=force)
@@ -147,11 +146,11 @@ def ensure_mesh_supplemental_records(version: str, force: bool = False) -> List[
     return _inner()
-def get_descriptor_records(element: Element, id_key: str, name_key) -> List[Dict[str, Any]]:
+def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict[str, Any]]:
     """Get MeSH descriptor records."""
     logger.info("extract MeSH descriptors, concepts, and terms")
-    rv: List[Dict[str, Any]] = [
+    rv: list[dict[str, Any]] = [
         get_descriptor_record(descriptor, id_key=id_key, name_key=name_key)
         for descriptor in tqdm(element, desc="Getting MeSH Descriptors", unit_scale=True)
     ]
@@ -204,7 +203,7 @@ def get_descriptor_record(
     element: Element,
     id_key: str,
     name_key: str,
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     """Get descriptor records from the main element.
     :param element: An XML element
@@ -228,13 +227,13 @@ def get_descriptor_record(
     return rv
-def get_concept_records(element: Element) -> List[Mapping[str, Any]]:
+def get_concept_records(element: Element) -> list[Mapping[str, Any]]:
     """Get concepts from a record."""
     return [get_concept_record(e) for e in element.findall("ConceptList/Concept")]
-def _get_xrefs(element: Element) -> List[Tuple[str, str]]:
-    raw_registry_numbers: List[str] = sorted(
+def _get_xrefs(element: Element) -> list[tuple[str, str]]:
+    raw_registry_numbers: list[str] = sorted(
         {e.text for e in element.findall("RelatedRegistryNumberList/RegistryNumber") if e.text}
     )
     registry_number = element.findtext("RegistryNumber")
@@ -267,7 +266,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
     if scope_note is not None:
         scope_note = scope_note.replace("\\n", "\n").strip()
-    rv: Dict[str, Any] = {
+    rv: dict[str, Any] = {
         "concept_ui": element.findtext("ConceptUI"),
         "name": element.findtext("ConceptName/String"),
         "terms": get_term_records(element),
@@ -286,7 +285,7 @@ def get_concept_record(element: Element) -> Mapping[str, Any]:
     return rv
-def get_term_records(element: Element) -> List[Mapping[str, Any]]:
+def get_term_records(element: Element) -> list[Mapping[str, Any]]:
     """Get all of the terms for a concept."""
     return [get_term_record(term) for term in element.findall("TermList/Term")]
@@ -307,7 +306,7 @@ def _text_or_bust(element: Element, name: str) -> str:
     return n
-def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
+def _get_descriptor_qualifiers(descriptor: Element) -> list[Mapping[str, str]]:
     return [
         {
             "qualifier_ui": _text_or_bust(qualifier, "QualifierUI"),
@@ -321,7 +320,7 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
 def get_mesh_category_curies(
     letter: str, *, skip: Optional[Collection[str]] = None, version: Optional[str] = None
-) -> List[str]:
+) -> list[str]:
     """Get the MeSH LUIDs for a category, by letter (e.g., "A").
     :param letter: The MeSH tree, A for anatomy, C for disease, etc.
@@ -332,8 +331,7 @@ def get_mesh_category_curies(
     .. seealso:: https://meshb.nlm.nih.gov/treeView
     """
     if version is None:
-        version = get_version("mesh")
-        assert version is not None
+        version = safe_get_version("mesh")
     tree_to_mesh = get_tree_to_mesh_id(version=version)
     rv = []
     for i in range(1, 100):

pyobo/sources/mgi.py CHANGED Viewed

@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for MGI."""
 import logging
 from collections import defaultdict
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm

pyobo/sources/mirbase.py CHANGED Viewed

@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for miRBase."""
 import gzip
 import logging
-from typing import Iterable, List, Mapping
+from collections.abc import Iterable, Mapping
 from tqdm.auto import tqdm
@@ -48,7 +46,7 @@ def get_obo(force: bool = False) -> Obo:
     return MiRBaseGetter(force=force)
-def get_terms(version: str, force: bool = False) -> List[Term]:
+def get_terms(version: str, force: bool = False) -> list[Term]:
     """Parse miRNA data from filepath and convert it to dictionary."""
     _assert_frozen_version(version)
     url = f"{BASE_URL}/miRNA.dat.gz"
@@ -77,7 +75,7 @@ def _prepare_organisms(version: str, force: bool = False):
     return {division: (taxonomy_id, name) for _, division, name, _tree, taxonomy_id in df.values}
-def _prepare_aliases(version: str, force: bool = False) -> Mapping[str, List[str]]:
+def _prepare_aliases(version: str, force: bool = False) -> Mapping[str, list[str]]:
     _assert_frozen_version(version)
     url = f"{BASE_URL}/aliases.txt.gz"
     df = ensure_df(PREFIX, url=url, sep="\t", version=version, force=force)
@@ -94,7 +92,7 @@ def _process_definitions_lines(
     organisms = _prepare_organisms(version, force=force)
     aliases = _prepare_aliases(version, force=force)
-    groups: List[List[str]] = []
+    groups: list[list[str]] = []
     for line in lines:  # TODO replace with itertools.groupby
         if line.startswith("ID"):
@@ -138,9 +136,15 @@ def _process_definitions_lines(
             xref_prefix = xref_mapping.get(xref_prefix, xref_prefix)
             if xref_prefix == "pictar":
                 continue
-            xrefs.append(
-                Reference(prefix=xref_prefix, identifier=xref_identifier, name=xref_label or None)
-            )
+            try:
+                xref = Reference(
+                    prefix=xref_prefix, identifier=xref_identifier, name=xref_label or None
+                )
+            except ValueError:
+                tqdm.write(f"invalid xref: {xref_prefix}:{xref_identifier}")
+            else:
+                xrefs.append(xref)
         # TODO add pubmed references

pyobo/sources/mirbase_constants.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
 """Constants for miRBase."""
 import pandas as pd

pyobo/sources/mirbase_family.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Converter for miRBase Families."""
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm

pyobo/sources/mirbase_mature.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Converter for miRBase Mature."""
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm

pyobo/sources/msigdb.py CHANGED Viewed

@@ -1,11 +1,10 @@
-# -*- coding: utf-8 -*-
 """Parsers for MSig."""
 import logging
-from typing import Iterable, Optional
-from xml.etree import ElementTree
+from collections.abc import Iterable
+from typing import Optional
+from lxml.etree import ElementTree
 from tqdm.auto import tqdm
 from ..struct import Obo, Reference, Term, has_participant
@@ -137,7 +136,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
 def _get_definition(attrib) -> Optional[str]:
     rv = attrib["DESCRIPTION_FULL"].strip() or attrib["DESCRIPTION_BRIEF"].strip() or None
     if rv is not None:
-        return rv.replace(r"\d", "").replace(r"\s", "")  # noqa: W605
+        return rv.replace(r"\d", "").replace(r"\s", "")
     return None

pyobo/sources/ncbigene.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for Entrez."""
 import logging
-from typing import Iterable, List, Mapping, Set
+from collections.abc import Iterable, Mapping
 import bioregistry
 import pandas as pd
@@ -47,7 +45,7 @@ GENE_INFO_COLUMNS = [
 ]
-def get_ncbigene_ids() -> Set[str]:
+def get_ncbigene_ids() -> set[str]:
     """Get the Entrez name mapping."""
     df = _get_ncbigene_subset(["GeneID"])
     return set(df["GeneID"])
@@ -68,7 +66,7 @@ def _get_ncbigene_info_subset(usecols) -> Mapping[str, str]:
     return dict(df.values)
-def _get_ncbigene_subset(usecols: List[str]) -> pd.DataFrame:
+def _get_ncbigene_subset(usecols: list[str]) -> pd.DataFrame:
     df = ensure_df(
         PREFIX,
         url=GENE_INFO_URL,

pyobo/sources/npass.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for NPASS."""
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
@@ -41,7 +39,7 @@ def get_obo(force: bool = False) -> Obo:
 def get_df(version: str, force: bool = False) -> pd.DataFrame:
     """Get the NPASS chemical nomenclature."""
-    base_url = f"http://bidd.group/NPASS/downloadFiles/NPASSv{version}_download"
+    base_url = f"https://bidd.group/NPASS/downloadFiles/NPASSv{version}_download"
     url = f"{base_url}_naturalProducts_generalInfo.txt"
     return ensure_df(
         PREFIX,

pyobo/sources/omim_ps.py CHANGED Viewed

@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 """Converter for OMIM Phenotypic Series."""
 import logging
-from typing import Iterable
+from collections.abc import Iterable
 from bioversions.utils import get_soup

pyobo/sources/pathbank.py CHANGED Viewed

@@ -1,16 +1,16 @@
-# -*- coding: utf-8 -*-
 """Converter for PathBank."""
+from __future__ import annotations
 import logging
 from collections import defaultdict
-from typing import Iterable, Mapping, Set
+from collections.abc import Iterable, Mapping
 import pandas as pd
 from tqdm.auto import tqdm
 from ..struct import Obo, Reference, Term
-from ..struct.typedef import has_participant
+from ..struct.typedef import has_category, has_participant
 from ..utils.path import ensure_df
 __all__ = [
@@ -70,7 +70,7 @@ class PathBankGetter(Obo):
     """An ontology representation of PathBank's pathway nomenclature."""
     ontology = bioversions_key = PREFIX
-    typedefs = [has_participant]
+    typedefs = [has_participant, has_category]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -98,46 +98,58 @@ def get_proteins_df(version: str, force: bool = False) -> pd.DataFrame:
     return proteins_df
-def get_protein_mapping(version: str, force: bool = False) -> Mapping[str, Set[Reference]]:
+def get_protein_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]:
     """Make the protein mapping."""
     proteins_df = get_proteins_df(version=version, force=force)
     smpdb_id_to_proteins = defaultdict(set)
     for pathway_id, protein_id in tqdm(
         proteins_df.values, desc=f"[{PREFIX}] mapping proteins", unit_scale=True
     ):
-        # TODO get protein names
-        smpdb_id_to_proteins[pathway_id].add(Reference(prefix="uniprot", identifier=protein_id))
+        try:
+            if "-" in protein_id:
+                reference = Reference(prefix="uniprot.isoform", identifier=protein_id)
+            else:
+                reference = Reference(prefix="uniprot", identifier=protein_id)
+        except ValueError:
+            tqdm.write(f"[pathbank] invalid uniprot identifier: {protein_id}")
+        else:
+            smpdb_id_to_proteins[pathway_id].add(reference)
     return smpdb_id_to_proteins
 def get_metabolite_df(version: str, force: bool = False) -> pd.DataFrame:
     """Get the metabolites dataframe."""
-    return ensure_df(
+    df = ensure_df(
         PREFIX,
         url=METABOLITE_URL,
         sep=",",
-        usecols=["PathBank ID", "Metabolite ID", "Metabolite Name"],
+        usecols=["PathBank ID", "ChEBI ID"],
         force=force,
         version=version,
     )
+    df = df[df["ChEBI ID"].notna()]
+    return df
-def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str, Set[Reference]]:
+def get_metabolite_mapping(version: str, force: bool = False) -> Mapping[str, set[Reference]]:
     """Make the metabolite mapping."""
     metabolites_df = get_metabolite_df(version=version, force=force)
     smpdb_id_to_metabolites = defaultdict(set)
     it = tqdm(metabolites_df.values, desc=f"[{PREFIX}] mapping metabolites", unit_scale=True)
-    for pathway_id, metabolite_id, metabolite_name in it:
-        smpdb_id_to_metabolites[pathway_id].add(
-            Reference(
-                prefix=PREFIX,
-                identifier=metabolite_id,
-                name=metabolite_name,
-            )
-        )
+    for pathway_id, metabolite_id in it:
+        reference = Reference(prefix="chebi", identifier=metabolite_id.strip())
+        smpdb_id_to_metabolites[pathway_id].add(reference)
     return smpdb_id_to_metabolites
+def _clean_description(description: str) -> str | None:
+    """Clean the description."""
+    if pd.isna(description) or not description:
+        return None
+    parts = [part.strip() for part in description.strip().splitlines()]
+    return " ".join(parts)
 def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get PathBank's terms."""
     smpdb_id_to_proteins = get_protein_mapping(version=version, force=force)
@@ -149,16 +161,11 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         reference = Reference(prefix=PREFIX, identifier=pathbank_id, name=name)
         term = Term(
             reference=reference,
-            # definition=description.replace('\n', ' '),
-            xrefs=[Reference(prefix="smpdb", identifier=smpdb_id)],
-        )
-        term.append_parent(
-            Reference(
-                prefix=PREFIX,
-                identifier=subject.lower().replace(" ", "_"),
-                name=subject,
-            )
+            # TODO use _clean_description(description) to add a description,
+            #  but there are weird parser errors
         )
+        term.append_exact_match(Reference(prefix="smpdb", identifier=smpdb_id))
+        term.append_property(has_category, subject.lower().replace(" ", "_"))
         term.extend_relationship(has_participant, smpdb_id_to_proteins[smpdb_id])
         term.extend_relationship(has_participant, smpdb_id_to_metabolites[smpdb_id])
         yield term

pyobo/sources/pfam.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Convert PFAM to OBO."""
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd

pyobo/sources/pfam_clan.py CHANGED Viewed

@@ -1,8 +1,6 @@
-# -*- coding: utf-8 -*-
 """Convert PFAM Clans to OBO."""
-from typing import Iterable
+from collections.abc import Iterable
 from tqdm.auto import tqdm

pyobo/sources/pid.py CHANGED Viewed

@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for NCI PID."""
 import logging
 from collections import defaultdict
-from typing import Iterable, List, Mapping, Tuple
+from collections.abc import Iterable, Mapping
 import pandas as pd
@@ -45,7 +43,7 @@ def get_obo() -> Obo:
     return PIDGetter()
-def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[Tuple[str, CX]]:
+def iter_networks(use_tqdm: bool = False, force: bool = False) -> Iterable[tuple[str, CX]]:
     """Iterate over NCI PID networks."""
     yield from ensure_ndex_network_set(
         PREFIX, NDEX_NETWORK_SET_UUID, use_tqdm=use_tqdm, force=force
@@ -117,7 +115,7 @@ def get_curation_df() -> pd.DataFrame:
     return df[["Text from NDEx", "Type", "Namespace", "Identifier"]]
-def get_remapping() -> Mapping[str, List[Tuple[str, str]]]:
+def get_remapping() -> Mapping[str, list[tuple[str, str]]]:
     """Get a mapping from text to list of HGNC id/symbols."""
     curation_df = get_curation_df()
     rv = defaultdict(list)

pyobo/sources/pombase.py CHANGED Viewed

@@ -1,16 +1,15 @@
-# -*- coding: utf-8 -*-
 """Converter for PomBase."""
 import logging
 from collections import defaultdict
-from typing import Iterable
+from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
 import pyobo
 from pyobo import Reference
+from pyobo.resources.so import get_so_name
 from pyobo.struct import Obo, Term, from_species, has_gene_product, orthologous
 from pyobo.utils.path import ensure_df
@@ -21,7 +20,7 @@ __all__ = [
 logger = logging.getLogger(__name__)
 PREFIX = "pombase"
-URL = "https://www.pombase.org/data/names_and_identifiers/gene_IDs_names_products.tsv"
+GENE_NAMES_URL = "https://www.pombase.org/data/names_and_identifiers/gene_IDs_names_products.tsv"
 ORTHOLOGS_URL = "https://www.pombase.org/data/orthologs/human-orthologs.txt.gz"
@@ -70,9 +69,11 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             if hgnc_id is not None:
                 identifier_to_hgnc_ids[identifier].add(hgnc_id)
-    df = ensure_df(PREFIX, url=URL, force=force, header=None, version=version)
+    df = ensure_df(PREFIX, url=GENE_NAMES_URL, force=force, version=version)
     so = {
-        gtype: Reference.auto("SO", POMBASE_TO_SO[gtype])
+        gtype: Reference(
+            prefix="SO", identifier=POMBASE_TO_SO[gtype], name=get_so_name(POMBASE_TO_SO[gtype])
+        )
         for gtype in sorted(df[df.columns[6]].unique())
     }
     for _, reference in sorted(so.items()):

pyobo/sources/pubchem.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for PubChem Compound."""
 import logging
-from typing import Iterable, Mapping, Optional
+from collections.abc import Iterable, Mapping
+from typing import Optional
 import pandas as pd
 from bioregistry.utils import removeprefix

pyobo/sources/reactome.py CHANGED Viewed

@@ -1,11 +1,9 @@
-# -*- coding: utf-8 -*-
 """Converter for Reactome."""
 import logging
 from collections import defaultdict
+from collections.abc import Iterable, Mapping
 from functools import lru_cache
-from typing import Iterable, Mapping, Set
 import pandas as pd
 from tqdm.auto import tqdm
@@ -72,7 +70,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     df["taxonomy_id"] = df["species"].map(get_ncbitaxon_id)
     terms = {}
-    it = tqdm(df.values, total=len(df.index), desc=f"mapping {PREFIX}")
+    it = tqdm(
+        df.values, total=len(df.index), desc=f"mapping {PREFIX}", unit_scale=True, unit="pathway"
+    )
     for reactome_id, name, species_name, taxonomy_id in it:
         terms[reactome_id] = term = Term(
             reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
@@ -94,10 +94,21 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         terms[child_id].append_parent(terms[parent_id])
     uniprot_pathway_df = ensure_participant_df(version=version, force=force)
-    for uniprot_id, reactome_id in tqdm(uniprot_pathway_df.values, total=len(uniprot_pathway_df)):
-        terms[reactome_id].append_relationship(
-            has_participant, Reference(prefix="uniprot", identifier=uniprot_id)
-        )
+    for uniprot_id, reactome_id in tqdm(
+        uniprot_pathway_df.values,
+        total=len(uniprot_pathway_df),
+        unit_scale=True,
+        unit="pathway-protein",
+    ):
+        if reactome_id not in terms:
+            tqdm.write(f"{reactome_id} appears in uniprot participants file but not pathways file")
+            continue
+        if "-" in uniprot_id:
+            reference = Reference(prefix="uniprot.isoform", identifier=uniprot_id)
+        else:
+            reference = Reference(prefix="uniprot", identifier=uniprot_id)
+        terms[reactome_id].append_relationship(has_participant, reference)
     chebi_pathway_url = f"https://reactome.org/download/{version}/ChEBI2Reactome_All_Levels.txt"
     chebi_pathway_df = ensure_df(
@@ -108,7 +119,15 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         version=version,
         force=force,
     )
-    for chebi_id, reactome_id in tqdm(chebi_pathway_df.values, total=len(chebi_pathway_df)):
+    for chebi_id, reactome_id in tqdm(
+        chebi_pathway_df.values,
+        total=len(chebi_pathway_df),
+        unit_scale=True,
+        unit="pathway-chemical",
+    ):
+        if reactome_id not in terms:
+            tqdm.write(f"{reactome_id} appears in chebi participants file but not pathways file")
+            continue
         terms[reactome_id].append_relationship(
             has_participant, Reference(prefix="chebi", identifier=chebi_id)
         )
@@ -122,7 +141,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
 @lru_cache(maxsize=1)
-def get_protein_to_pathways() -> Mapping[str, Set[str]]:
+def get_protein_to_pathways() -> Mapping[str, set[str]]:
     """Get a mapping from proteins to the pathways they're in."""
     protein_to_pathways = defaultdict(set)
     x = get_id_multirelations_mapping("reactome", has_participant)
@@ -135,4 +154,4 @@ def get_protein_to_pathways() -> Mapping[str, Set[str]]:
 if __name__ == "__main__":
-    get_obo().write_default()
+    ReactomeGetter.cli()

pyobo/sources/rgd.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# -*- coding: utf-8 -*-
 """Converter for RGD."""
 import logging
-from typing import Iterable, Optional
+from collections.abc import Iterable
+from typing import Optional
 import pandas as pd
 from tqdm.auto import tqdm
@@ -138,7 +137,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
                         continue
                     if prefix == "uniprot":
                         term.append_relationship(
-                            has_gene_product, Reference.auto(prefix=prefix, identifier=xref_id)
+                            has_gene_product, Reference(prefix=prefix, identifier=xref_id)
                         )
                     elif prefix == "ensembl":
                         if xref_id.startswith("ENSMUSG") or xref_id.startswith("ENSRNOG"):

pyobo 0.10.12__py3-none-any.whl → 0.11.1__py3-none-any.whl

pyobo 0.10.12py3-none-any.whl → 0.11.1py3-none-any.whl