PyPI - pyobo - Versions diffs - 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl - Mend

pyobo 0.10.7py3-none-any.whl → 0.10.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

pyobo/api/hierarchy.py +21 -11
pyobo/api/properties.py +24 -9
pyobo/api/xrefs.py +3 -1
pyobo/getters.py +2 -2
pyobo/gilda_utils.py +49 -19
pyobo/sources/__init__.py +4 -0
pyobo/sources/cgnc.py +1 -1
pyobo/sources/chebi.py +4 -2
pyobo/sources/civic_gene.py +55 -0
pyobo/sources/cvx.py +18 -2
pyobo/sources/famplex.py +5 -3
pyobo/sources/mesh.py +29 -1
pyobo/sources/ncbigene.py +5 -3
pyobo/sources/omim_ps.py +39 -0
pyobo/sources/rhea.py +141 -36
pyobo/sources/umls/umls.py +17 -2
pyobo/sources/uniprot/uniprot.py +123 -16
pyobo/struct/__init__.py +1 -0
pyobo/struct/struct.py +12 -6
pyobo/struct/typedef.py +35 -5
pyobo/utils/misc.py +22 -16
pyobo/version.py +1 -1
pyobo/xrefdb/sources/wikidata.py +5 -3
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/METADATA +2 -2
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/RECORD +29 -27
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/WHEEL +1 -1
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/LICENSE +0 -0
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/entry_points.txt +0 -0
{pyobo-0.10.7.dist-info → pyobo-0.10.9.dist-info}/top_level.txt +0 -0

pyobo/api/hierarchy.py CHANGED Viewed

@@ -168,14 +168,15 @@ def is_descendent(prefix, identifier, ancestor_prefix, ancestor_identifier) -> b
 @lru_cache()
 def get_descendants(
     prefix: str,
-    identifier: str,
+    identifier: Optional[str] = None,
     include_part_of: bool = True,
     include_has_member: bool = False,
     use_tqdm: bool = False,
     force: bool = False,
     **kwargs,
 ) -> Optional[Set[str]]:
-    """Get all of the descendants (children) of the term as CURIEs."""
+    """Get all the descendants (children) of the term as CURIEs."""
+    curie, prefix, identifier = _pic(prefix, identifier)
     hierarchy = get_hierarchy(
         prefix=prefix,
         include_has_member=include_has_member,
@@ -184,23 +185,32 @@ def get_descendants(
         force=force,
         **kwargs,
     )
-    curie = f"{prefix}:{identifier}"
     if curie not in hierarchy:
         return None
     return nx.ancestors(hierarchy, curie)  # note this is backwards
+def _pic(prefix, identifier=None) -> Tuple[str, str, str]:
+    if identifier is None:
+        curie = prefix
+        prefix, identifier = prefix.split(":")
+    else:
+        curie = f"{prefix}:{identifier}"
+    return curie, prefix, identifier
 @lru_cache()
 def get_children(
     prefix: str,
-    identifier: str,
+    identifier: Optional[str] = None,
     include_part_of: bool = True,
     include_has_member: bool = False,
     use_tqdm: bool = False,
     force: bool = False,
     **kwargs,
 ) -> Optional[Set[str]]:
-    """Get all of the descendants (children) of the term as CURIEs."""
+    """Get all the descendants (children) of the term as CURIEs."""
+    curie, prefix, identifier = _pic(prefix, identifier)
     hierarchy = get_hierarchy(
         prefix=prefix,
         include_has_member=include_has_member,
@@ -209,7 +219,6 @@ def get_children(
         force=force,
         **kwargs,
     )
-    curie = f"{prefix}:{identifier}"
     if curie not in hierarchy:
         return None
     return set(hierarchy.predecessors(curie))
@@ -228,14 +237,15 @@ def has_ancestor(prefix, identifier, ancestor_prefix, ancestor_identifier) -> bo
 @lru_cache()
 def get_ancestors(
     prefix: str,
-    identifier: str,
+    identifier: Optional[str] = None,
     include_part_of: bool = True,
     include_has_member: bool = False,
     use_tqdm: bool = False,
     force: bool = False,
     **kwargs,
 ) -> Optional[Set[str]]:
-    """Get all of the ancestors (parents) of the term as CURIEs."""
+    """Get all the ancestors (parents) of the term as CURIEs."""
+    curie, prefix, identifier = _pic(prefix, identifier)
     hierarchy = get_hierarchy(
         prefix=prefix,
         include_has_member=include_has_member,
@@ -244,7 +254,6 @@ def get_ancestors(
         force=force,
         **kwargs,
     )
-    curie = f"{prefix}:{identifier}"
     if curie not in hierarchy:
         return None
     return nx.descendants(hierarchy, curie)  # note this is backwards
@@ -252,7 +261,7 @@ def get_ancestors(
 def get_subhierarchy(
     prefix: str,
-    identifier: str,
+    identifier: Optional[str] = None,
     include_part_of: bool = True,
     include_has_member: bool = False,
     use_tqdm: bool = False,
@@ -260,6 +269,7 @@ def get_subhierarchy(
     **kwargs,
 ) -> nx.DiGraph:
     """Get the subhierarchy for a given node."""
+    curie, prefix, identifier = _pic(prefix, identifier)
     hierarchy = get_hierarchy(
         prefix=prefix,
         include_has_member=include_has_member,
@@ -271,7 +281,7 @@ def get_subhierarchy(
     logger.info(
         "getting descendants of %s:%s ! %s", prefix, identifier, get_name(prefix, identifier)
     )
-    curies = nx.ancestors(hierarchy, f"{prefix}:{identifier}")  # note this is backwards
+    curies = nx.ancestors(hierarchy, curie)  # note this is backwards
     logger.info("inducing subgraph")
     sg = hierarchy.subgraph(curies).copy()
     logger.info("subgraph has %d nodes/%d edges", sg.number_of_nodes(), sg.number_of_edges())

pyobo/api/properties.py CHANGED Viewed

@@ -28,14 +28,17 @@ logger = logging.getLogger(__name__)
 @wrap_norm_prefix
-def get_properties_df(prefix: str, *, force: bool = False) -> pd.DataFrame:
+def get_properties_df(
+    prefix: str, *, force: bool = False, version: Optional[str] = None
+) -> pd.DataFrame:
     """Extract properties.
     :param prefix: the resource to load
     :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A dataframe with the properties
     """
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, name="properties.tsv", version=version)
     @cached_df(path=path, dtype=str, force=force)
@@ -59,6 +62,7 @@ def get_filtered_properties_mapping(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    version: Optional[str] = None,
 ) -> Mapping[str, str]:
     """Extract a single property for each term as a dictionary.
@@ -68,7 +72,12 @@ def get_filtered_properties_mapping(
     :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A mapping from identifier to property value
     """
-    version = get_version(prefix)
+    df = get_properties_df(prefix=prefix, force=force, version=version)
+    df = df[df["property"] == prop]
+    return dict(df[[f"{prefix}_id", "value"]].values)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
     all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
@@ -95,6 +104,7 @@ def get_filtered_properties_multimapping(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    version: Optional[str] = None,
 ) -> Mapping[str, List[str]]:
     """Extract multiple properties for each term as a dictionary.
@@ -104,7 +114,8 @@ def get_filtered_properties_multimapping(
     :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A mapping from identifier to property values
     """
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
     all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)
@@ -124,7 +135,7 @@ def get_filtered_properties_multimapping(
     return _mapping_getter()
-def get_property(prefix: str, identifier: str, prop: str) -> Optional[str]:
+def get_property(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[str]:
     """Extract a single property for the given entity.
     :param prefix: the resource to load
@@ -136,11 +147,13 @@ def get_property(prefix: str, identifier: str, prop: str) -> Optional[str]:
     >>> pyobo.get_property('chebi', '132964', 'http://purl.obolibrary.org/obo/chebi/smiles')
     "C1(=CC=C(N=C1)OC2=CC=C(C=C2)O[C@@H](C(OCCCC)=O)C)C(F)(F)F"
     """
-    filtered_properties_mapping = get_filtered_properties_mapping(prefix=prefix, prop=prop)
+    filtered_properties_mapping = get_filtered_properties_mapping(
+        prefix=prefix, prop=prop, **kwargs
+    )
     return filtered_properties_mapping.get(identifier)
-def get_properties(prefix: str, identifier: str, prop: str) -> Optional[List[str]]:
+def get_properties(prefix: str, identifier: str, prop: str, **kwargs) -> Optional[List[str]]:
     """Extract a set of properties for the given entity.
     :param prefix: the resource to load
@@ -149,7 +162,7 @@ def get_properties(prefix: str, identifier: str, prop: str) -> Optional[List[str
     :returns: Multiple values for the property. If only one is expected, use :func:`get_property`
     """
     filtered_properties_multimapping = get_filtered_properties_multimapping(
-        prefix=prefix, prop=prop
+        prefix=prefix, prop=prop, **kwargs
     )
     return filtered_properties_multimapping.get(identifier)
@@ -161,6 +174,7 @@ def get_filtered_properties_df(
     *,
     use_tqdm: bool = False,
     force: bool = False,
+    version: Optional[str] = None,
 ) -> pd.DataFrame:
     """Extract a single property for each term.
@@ -170,7 +184,8 @@ def get_filtered_properties_df(
     :param force: should the resource be re-downloaded, re-parsed, and re-cached?
     :returns: A dataframe from identifier to property value. Columns are [<prefix>_id, value].
     """
-    version = get_version(prefix)
+    if version is None:
+        version = get_version(prefix)
     path = prefix_cache_join(prefix, "properties", name=f"{prop}.tsv", version=version)
     all_properties_path = prefix_cache_join(prefix, name="properties.tsv", version=version)

pyobo/api/xrefs.py CHANGED Viewed

@@ -142,7 +142,9 @@ def get_sssom_df(
         df = get_xrefs_df(prefix=prefix, **kwargs)
     rows: List[Tuple[str, ...]] = []
     with logging_redirect_tqdm():
-        for source_id, target_prefix, target_id in tqdm(df.values, unit="mapping", unit_scale=True):
+        for source_id, target_prefix, target_id in tqdm(
+            df.values, unit="mapping", unit_scale=True, desc=f"[{prefix}] SSSOM"
+        ):
             source = Reference(prefix=prefix, identifier=source_id)
             target = Reference(prefix=target_prefix, identifier=target_id)

pyobo/getters.py CHANGED Viewed

@@ -55,7 +55,7 @@ class UnhandledFormat(NoBuild):
 #: The following prefixes can not be loaded through ROBOT without
 #: turning off integrity checks
-REQUIRES_NO_ROBOT_CHECK = {"clo", "vo"}
+REQUIRES_NO_ROBOT_CHECK = {"clo", "vo", "orphanet.ordo", "orphanet"}
 @wrap_norm_prefix
@@ -117,7 +117,7 @@ def get_ontology(
     ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version)
     if path is None:
-        raise NoBuild
+        raise NoBuild(prefix)
     elif ontology_format == "obo":
         pass  # all gucci
     elif ontology_format == "owl":

pyobo/gilda_utils.py CHANGED Viewed

@@ -3,6 +3,7 @@
 """PyOBO's Gilda utilities."""
 import logging
+from subprocess import CalledProcessError
 from typing import Iterable, List, Optional, Tuple, Type, Union
 import bioregistry
@@ -96,6 +97,7 @@ def get_grounder(
     versions: Union[None, str, Iterable[Union[str, None]]] = None,
     strict: bool = True,
     skip_obsolete: bool = False,
+    progress: bool = True,
 ) -> Grounder:
     """Get a Gilda grounder for the given prefix(es)."""
     unnamed = set() if unnamed is None else set(unnamed)
@@ -113,7 +115,7 @@ def get_grounder(
         raise ValueError
     terms: List[gilda.term.Term] = []
-    for prefix, version in zip(prefixes, versions):
+    for prefix, version in zip(tqdm(prefixes, leave=False, disable=not progress), versions):
         try:
             p_terms = list(
                 get_gilda_terms(
@@ -122,9 +124,10 @@ def get_grounder(
                     version=version,
                     strict=strict,
                     skip_obsolete=skip_obsolete,
+                    progress=progress,
                 )
             )
-        except NoBuild:
+        except (NoBuild, CalledProcessError):
             continue
         else:
             terms.extend(p_terms)
@@ -144,17 +147,21 @@ def _fast_term(
     name: str,
     status: str,
     organism: Optional[str] = None,
-) -> gilda.term.Term:
-    return gilda.term.Term(
-        norm_text=normalize(text),
-        text=text,
-        db=prefix,
-        id=identifier,
-        entry_name=name,
-        status=status,
-        source=prefix,
-        organism=organism,
-    )
+) -> Optional[gilda.term.Term]:
+    try:
+        term = gilda.term.Term(
+            norm_text=normalize(text),
+            text=text,
+            db=prefix,
+            id=identifier,
+            entry_name=name,
+            status=status,
+            source=prefix,
+            organism=organism,
+        )
+    except ValueError:
+        return None
+    return term
 def get_gilda_terms(
@@ -164,17 +171,24 @@ def get_gilda_terms(
     version: Optional[str] = None,
     strict: bool = True,
     skip_obsolete: bool = False,
+    progress: bool = True,
 ) -> Iterable[gilda.term.Term]:
     """Get gilda terms for the given namespace."""
     id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
     id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
     obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
-    it = tqdm(id_to_name.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="name")
+    it = tqdm(
+        id_to_name.items(),
+        desc=f"[{prefix}] mapping",
+        unit_scale=True,
+        unit="name",
+        disable=not progress,
+    )
     for identifier, name in it:
         if identifier in obsoletes:
             continue
-        yield _fast_term(
+        term = _fast_term(
             text=name,
             prefix=prefix,
             identifier=identifier,
@@ -182,11 +196,17 @@ def get_gilda_terms(
             status="name",
             organism=id_to_species.get(identifier),
         )
+        if term is not None:
+            yield term
     id_to_synonyms = get_id_synonyms_mapping(prefix, version=version)
     if id_to_synonyms:
         it = tqdm(
-            id_to_synonyms.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="synonym"
+            id_to_synonyms.items(),
+            desc=f"[{prefix}] mapping",
+            unit_scale=True,
+            unit="synonym",
+            disable=not progress,
         )
         for identifier, synonyms in it:
             if identifier in obsoletes:
@@ -195,7 +215,7 @@ def get_gilda_terms(
             for synonym in synonyms:
                 if not synonym:
                     continue
-                yield _fast_term(
+                term = _fast_term(
                     text=synonym,
                     prefix=prefix,
                     identifier=identifier,
@@ -203,13 +223,21 @@ def get_gilda_terms(
                     status="synonym",
                     organism=id_to_species.get(identifier),
                 )
+                if term is not None:
+                    yield term
     if identifiers_are_names:
-        it = tqdm(get_ids(prefix), desc=f"[{prefix}] mapping", unit_scale=True, unit="id")
+        it = tqdm(
+            get_ids(prefix),
+            desc=f"[{prefix}] mapping",
+            unit_scale=True,
+            unit="id",
+            disable=not progress,
+        )
         for identifier in it:
             if identifier in obsoletes:
                 continue
-            yield _fast_term(
+            term = _fast_term(
                 text=identifier,
                 prefix=prefix,
                 identifier=identifier,
@@ -217,3 +245,5 @@ def get_gilda_terms(
                 status="name",
                 organism=id_to_species.get(identifier),
             )
+            if term is not None:
+                yield term

pyobo/sources/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from .antibodyregistry import AntibodyRegistryGetter
 from .ccle import CCLEGetter
 from .cgnc import CGNCGetter
 from .chembl import ChEMBLCompoundGetter
+from .civic_gene import CIVICGeneGetter
 from .complexportal import ComplexPortalGetter
 from .conso import CONSOGetter
 from .cpt import CPTGetter
@@ -38,6 +39,7 @@ from .mirbase_mature import MiRBaseMatureGetter
 from .msigdb import MSigDBGetter
 from .ncbigene import NCBIGeneGetter
 from .npass import NPASSGetter
+from .omim_ps import OMIMPSGetter
 from .pathbank import PathBankGetter
 from .pfam import PfamGetter
 from .pfam_clan import PfamClanGetter
@@ -61,6 +63,7 @@ __all__ = [
     "AntibodyRegistryGetter",
     "CCLEGetter",
     "CGNCGetter",
+    "CIVICGeneGetter",
     "CONSOGetter",
     "CPTGetter",
     "CVXGetter",
@@ -94,6 +97,7 @@ __all__ = [
     "MiRBaseMatureGetter",
     "NCBIGeneGetter",
     "NPASSGetter",
+    "OMIMPSGetter",
     "PIDGetter",
     "PathBankGetter",
     "PfamClanGetter",

pyobo/sources/cgnc.py CHANGED Viewed

@@ -69,7 +69,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
         term = Term.from_triple(
             prefix=PREFIX,
             identifier=cgnc_id,
-            name=name,
+            name=name if pd.notna(name) else None,
         )
         term.set_species(identifier="9031", name="Gallus gallus")
         if entrez_id and pd.notna(entrez_id):

pyobo/sources/chebi.py CHANGED Viewed

@@ -15,12 +15,14 @@ __all__ = [
 ]
-def get_chebi_id_smiles_mapping() -> Mapping[str, str]:
+def get_chebi_id_smiles_mapping(**kwargs) -> Mapping[str, str]:
     """Get a mapping from ChEBI identifiers to SMILES.
     This is common enough that it gets its own function :)
     """
-    return get_filtered_properties_mapping("chebi", "http://purl.obolibrary.org/obo/chebi/smiles")
+    return get_filtered_properties_mapping(
+        "chebi", "http://purl.obolibrary.org/obo/chebi/smiles", **kwargs
+    )
 def get_chebi_smiles_id_mapping() -> Mapping[str, str]:

pyobo/sources/civic_gene.py ADDED Viewed

@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+"""Converter for CiVIC Genes."""
+from typing import Iterable, Optional
+import pandas as pd
+from pyobo.struct import Obo, Reference, Term
+from pyobo.utils.path import ensure_df
+__all__ = [
+    "CIVICGeneGetter",
+]
+PREFIX = "civic.gid"
+URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv"
+def _sort(_o, t):
+    return int(t.identifier)
+class CIVICGeneGetter(Obo):
+    """An ontology representation of CiVIC's gene nomenclature."""
+    bioversions_key = ontology = PREFIX
+    term_sort_key = _sort
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over gene terms for CiVIC."""
+        yield from get_terms(self.data_version, force=force)
+def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
+    """Get CIVIC terms."""
+    # if version is not None:
+    #     version_dt: datetime.date = dateutil.parser.parse(version)
+    # else:
+    #     version_dt: datetime.date = datetime.today()
+    # version = version_dt.strftime("01-%b-%Y")
+    # version is like 01-Feb-2024
+    url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv"
+    df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version)
+    for identifier, _, name, entrez_id, description, _last_review, _flag in df.values:
+        term = Term(
+            reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
+            definition=description if pd.notna(description) else None,
+        )
+        term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id))
+        yield term
+if __name__ == "__main__":
+    CIVICGeneGetter.cli()

pyobo/sources/cvx.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Iterable
 import pandas as pd
-from pyobo import Obo, Term
+from pyobo import Obo, Reference, Term
 __all__ = [
     "CVXGetter",
@@ -28,6 +28,11 @@ class CVXGetter(Obo):
         return iter_terms()
+# This got split, which it's not obvious how to deal with this
+MANUAL_OBSOLETE = {"15"}
+REPLACEMENTS = {"31": "85", "154": "86", "180": "13"}
 def iter_terms() -> Iterable[Term]:
     """Iterate over terms in CVX."""
     dd = defaultdict(set)
@@ -60,11 +65,22 @@ def iter_terms() -> Iterable[Term]:
         cvx_df[col] = cvx_df[col].map(lambda s: s.strip() if pd.notna(s) else s)
     terms = {}
     for cvx, short_name, full_name, notes, status, nonvaccine, _updated in cvx_df.values:
-        term = Term.from_triple(PREFIX, cvx, full_name)
+        if cvx == "99":
+            continue  # this is a placeholder
+        is_obsolete = cvx in MANUAL_OBSOLETE or (pd.notna(notes) and "do not use" in notes.lower())
+        term = Term(
+            reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
+            is_obsolete=is_obsolete,
+        )
         if short_name != full_name:
             term.append_synonym(short_name)
         if pd.notna(notes):
             term.append_comment(notes)
+        if is_obsolete:
+            replacement_identifier = REPLACEMENTS.get(cvx)
+            if replacement_identifier:
+                term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
         if pd.notna(status):
             term.append_property("status", status)
         if pd.notna(nonvaccine):

pyobo/sources/famplex.py CHANGED Viewed

@@ -151,9 +151,11 @@ def _get_xref_df(version: str) -> Mapping[str, List[Reference]]:
     }
     xrefs_df[0] = xrefs_df[0].map(lambda s: ns_remapping.get(s, s))
     xrefs_df[1] = [
-        bioregistry.standardize_identifier(xref_prefix, xref_identifier)
-        if xref_prefix != "nextprot.family"
-        else xref_identifier[len("FA:") :]
+        (
+            bioregistry.standardize_identifier(xref_prefix, xref_identifier)
+            if xref_prefix != "nextprot.family"
+            else xref_identifier[len("FA:") :]
+        )
         for xref_prefix, xref_identifier in xrefs_df[[0, 1]].values
     ]

pyobo/sources/mesh.py CHANGED Viewed

@@ -6,7 +6,7 @@ import datetime
 import itertools as itt
 import logging
 import re
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple
+from typing import Any, Collection, Dict, Iterable, List, Mapping, Optional, Set, Tuple
 from xml.etree.ElementTree import Element
 from tqdm.auto import tqdm
@@ -19,6 +19,7 @@ from pyobo.utils.path import ensure_path, prefix_directory_join
 __all__ = [
     "MeSHGetter",
+    "get_mesh_category_curies",
 ]
 logger = logging.getLogger(__name__)
@@ -317,5 +318,32 @@ def _get_descriptor_qualifiers(descriptor: Element) -> List[Mapping[str, str]]:
     ]
+def get_mesh_category_curies(letter: str, skip: Optional[Collection[str]] = None) -> List[str]:
+    """Get the MeSH LUIDs for a category, by letter (e.g., "A").
+    :param letter: The MeSH tree, A for anatomy, C for disease, etc.
+    :param skip: An optional collection of MeSH tree codes to skip, such as "A03"
+    :returns: A list of MeSH CURIE strings for the top level of each MeSH tree.
+    .. seealso:: https://meshb.nlm.nih.gov/treeView
+    """
+    import bioversions
+    mesh_version = bioversions.get_version("mesh")
+    if mesh_version is None:
+        raise ValueError
+    tree_to_mesh = get_tree_to_mesh_id(mesh_version)
+    rv = []
+    for i in range(1, 100):
+        key = f"{letter}{i:02}"
+        if skip and key in skip:
+            continue
+        mesh_id = tree_to_mesh.get(key)
+        if mesh_id is None:
+            break
+        rv.append(f"mesh:{mesh_id}")
+    return rv
 if __name__ == "__main__":
     get_obo(force=True).write_default(force=True, write_obo=True)

pyobo/sources/ncbigene.py CHANGED Viewed

@@ -171,15 +171,17 @@ def get_terms(force: bool = False) -> Iterable[Term]:
             continue
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=gene_id, name=symbol),
-            definition=description,
+            definition=description if pd.notna(description) else None,
         )
         term.set_species(identifier=tax_id)
         if pd.notna(xref_curies):
             for xref_curie in xref_curies.split("|"):
                 if xref_curie.startswith("EnsemblRapid"):
                     continue
-                if xref_curie.startswith("AllianceGenome"):
+                elif xref_curie.startswith("AllianceGenome"):
                     xref_curie = xref_curie[len("xref_curie") :]
+                elif xref_curie.startswith("nome:WB:"):
+                    xref_curie = xref_curie[len("nome:") :]
                 xref_prefix, xref_id = bioregistry.parse_curie(xref_curie)
                 if xref_prefix and xref_id:
                     term.append_xref(Reference(prefix=xref_prefix, identifier=xref_id))
@@ -187,7 +189,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
                     p = xref_curie.split(":")[0]
                     if p not in warning_prefixes:
                         warning_prefixes.add(p)
-                        tqdm.write(f"[{PREFIX}] unhandled xref prefix: {p}")
+                        tqdm.write(f"[{PREFIX}] unhandled prefix in xref: {xref_curie}")
         yield term

pyobo 0.10.7__py3-none-any.whl → 0.10.9__py3-none-any.whl

pyobo 0.10.7py3-none-any.whl → 0.10.9py3-none-any.whl