PyPI - pyobo - Versions diffs - 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl - Mend

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +6 -0
pyobo/api/__init__.py +3 -0
pyobo/api/embedding.py +118 -0
pyobo/api/utils.py +0 -10
pyobo/cli/cli.py +1 -6
pyobo/cli/database.py +7 -1
pyobo/constants.py +23 -0
pyobo/getters.py +52 -35
pyobo/identifier_utils/api.py +3 -1
pyobo/sources/__init__.py +14 -1
pyobo/sources/chembl/__init__.py +6 -0
pyobo/sources/chembl/chembl_cell.py +94 -0
pyobo/sources/chembl/chembl_mechanism.py +81 -0
pyobo/sources/chembl/chembl_tissue.py +70 -0
pyobo/sources/clinicaltrials.py +32 -33
pyobo/sources/complexportal.py +5 -1
pyobo/sources/drugcentral.py +2 -1
pyobo/sources/hgnc/hgnc.py +13 -6
pyobo/sources/iana_media_type.py +100 -0
pyobo/sources/mesh.py +82 -29
pyobo/sources/reactome.py +10 -3
pyobo/sources/spdx.py +89 -0
pyobo/sources/uniprot/uniprot.py +2 -2
pyobo/sources/wikipathways.py +92 -7
pyobo/struct/__init__.py +2 -0
pyobo/struct/functional/dsl.py +10 -1
pyobo/struct/functional/ontology.py +3 -3
pyobo/struct/obo/reader.py +17 -53
pyobo/struct/obograph/export.py +2 -2
pyobo/struct/struct.py +125 -8
pyobo/struct/struct_utils.py +10 -0
pyobo/struct/typedef.py +15 -3
pyobo/struct/vocabulary.py +8 -0
pyobo/utils/cache.py +4 -3
pyobo/utils/io.py +18 -56
pyobo/utils/misc.py +142 -1
pyobo/utils/path.py +34 -2
pyobo/version.py +1 -1
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/METADATA +11 -7
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/RECORD +44 -38
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/WHEEL +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/entry_points.txt +0 -0
{pyobo-0.12.4.dist-info → pyobo-0.12.6.dist-info}/licenses/LICENSE +0 -0

pyobo/sources/chembl/chembl_mechanism.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Converter for ChEMBL mechanisms."""
+import logging
+from collections.abc import Iterable
+import chembl_downloader
+from pyobo.struct import CHARLIE_TERM, PYOBO_INJECTED, Obo, Term
+from pyobo.struct.typedef import exact_match
+__all__ = [
+    "ChEMBLMechanismGetter",
+]
+logger = logging.getLogger(__name__)
+PREFIX = "chembl.mechanism"
+QUERY = "SELECT * from ACTION_TYPE"
+ROOT = (
+    Term.default(PREFIX, "mechanism", name="mechanism")
+    .append_contributor(CHARLIE_TERM)
+    .append_comment(PYOBO_INJECTED)
+)
+class ChEMBLMechanismGetter(Obo):
+    """An ontology representation of ChEMBL mechanisms."""
+    ontology = PREFIX
+    bioversions_key = "chembl"
+    typedefs = [exact_match]
+    root_terms = [ROOT.reference]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iter_terms(version=self._version_or_raise)
+def normalize_chembl_mechanism(name: str) -> str:
+    """Normalize a ChEMBL mechanism name into an identifier."""
+    return name.lower().replace(" ", "-")
+def _norm_name(name: str) -> str:
+    return name.lower().replace("rnai ", "RNAi ")
+def get_pattern(version: str | None = None) -> str:
+    """Get a pattern."""
+    df = chembl_downloader.query("SELECT action_type from ACTION_TYPE", version=version)
+    parts = "|".join(sorted(normalize_chembl_mechanism(name) for (name,) in df.values))
+    return f"^[{parts}]$"
+def iter_terms(version: str) -> Iterable[Term]:
+    """Iterate over ChEMBL mechanisms."""
+    df = chembl_downloader.query(QUERY, version=version)
+    terms = {}
+    parents = {}
+    for name, _description, parent in df.values:
+        identifier = normalize_chembl_mechanism(name)
+        terms[name] = Term.from_triple(prefix=PREFIX, identifier=identifier, name=_norm_name(name))
+        if name != parent:  # protect against "other" which is a child of itself
+            parents[name] = parent
+    for child, parent in parents.items():
+        terms[child].append_parent(terms[parent])
+    # these are the three top-level things in the hierarchy, which
+    # we annotate onto a dummy parent term
+    for name in [
+        "POSITIVE MODULATOR",
+        "NEGATIVE MODULATOR",
+        "OTHER",
+    ]:
+        terms[name].append_parent(ROOT)
+    yield from terms.values()
+if __name__ == "__main__":
+    ChEMBLMechanismGetter.cli()

pyobo/sources/chembl/chembl_tissue.py ADDED Viewed

@@ -0,0 +1,70 @@
+"""Converter for ChEMBL tissues."""
+import logging
+from collections.abc import Iterable
+import chembl_downloader
+from pyobo.struct import Obo, Reference, Term
+from pyobo.struct.typedef import exact_match
+__all__ = [
+    "ChEMBLTissueGetter",
+]
+logger = logging.getLogger(__name__)
+PREFIX = "chembl.tissue"
+QUERY = """\
+SELECT
+    CHEMBL_ID,
+    PREF_NAME,
+    UBERON_ID,
+    EFO_ID,
+    BTO_ID,
+    CALOHA_ID
+FROM TISSUE_DICTIONARY
+"""
+class ChEMBLTissueGetter(Obo):
+    """An ontology representation of ChEMBL tissues."""
+    ontology = PREFIX
+    bioversions_key = "chembl"
+    typedefs = [exact_match]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iter_terms(version=self._version_or_raise)
+def iter_terms(version: str | None = None) -> Iterable[Term]:
+    """Iterate over ChEMBL tissue terms."""
+    with chembl_downloader.cursor(version=version) as cursor:
+        cursor.execute(QUERY)
+        for chembl_id, name, uberon, efo, bto, caloha in cursor.fetchall():
+            term = Term(
+                reference=Reference(prefix=PREFIX, identifier=chembl_id, name=name),
+            )
+            if uberon:
+                term.append_exact_match(
+                    Reference(prefix="uberon", identifier=uberon.removeprefix("UBERON:"))
+                )
+            if efo:
+                term.append_exact_match(
+                    Reference(
+                        prefix="efo", identifier=efo.removeprefix("EFO:").removeprefix("EFO;")
+                    )
+                )
+            if bto:
+                term.append_exact_match(
+                    Reference(prefix="bto", identifier=bto.removeprefix("BTO:"))
+                )
+            if caloha:
+                term.append_exact_match(Reference(prefix="caloha", identifier=caloha))
+            yield term
+if __name__ == "__main__":
+    ChEMBLTissueGetter.cli()

pyobo/sources/clinicaltrials.py CHANGED Viewed

@@ -27,49 +27,48 @@ HAS_INTERVENTION = TypeDef(
     is_metadata_tag=True,
 )
-STUDY_TERM = Term(reference=default_reference(PREFIX, "study", name="study"))
+INVESTIGATION_TERM = Term(
+    reference=Reference(prefix="obi", identifier="0000066", name="investigation")
+)
-CLINICAL_TRIAL_TERM = Term(
-    reference=default_reference(PREFIX, "clinical-trial", name="clinical trial")
-).append_parent(STUDY_TERM)
+OBSERVATIONAL_INVESTIGATION_TERM = Term(
+    reference=Reference(prefix="obi", identifier="0003693", name="observational investigation")
+).append_parent(INVESTIGATION_TERM)
-INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
-    reference=default_reference(
-        PREFIX, "interventional-clinical-trial", name="interventional clinical trial"
-    )
-).append_parent(CLINICAL_TRIAL_TERM)
+CLINICAL_INVESTIGATION_TERM = Term(
+    reference=Reference(prefix="obi", identifier="0003697", name="clinical investigation")
+).append_parent(INVESTIGATION_TERM)
+CLINICAL_TRIAL_TERM = Term(
+    reference=Reference(prefix="obi", identifier="0003699", name="clinical trial")
+).append_parent(CLINICAL_INVESTIGATION_TERM)
 RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
-    reference=default_reference(
-        PREFIX,
-        "randomized-interventional-clinical-trial",
-        name="randomized interventional clinical trial",
+    reference=Reference(
+        prefix="obi",
+        identifier="0004001",
+        name="randomized clinical trial",
     )
-).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
+).append_parent(CLINICAL_TRIAL_TERM)
 NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM = Term(
-    reference=default_reference(
-        PREFIX,
-        "non-randomized-interventional-clinical-trial",
-        name="non-randomized interventional clinical trial",
-    )
-).append_parent(INTERVENTIONAL_CLINICAL_TRIAL_TERM)
-OBSERVATIONAL_CLINICAL_TRIAL_TERM = Term(
-    reference=default_reference(
-        PREFIX, "observational-clinical-trial", name="observational clinical trial"
+    reference=Reference(
+        prefix="obi",
+        identifier="0004002",
+        name="non-randomized clinical trial",
     )
 ).append_parent(CLINICAL_TRIAL_TERM)
+# TODO request OBI term
 EXPANDED_ACCESS_STUDY_TERM = Term(
     reference=default_reference(PREFIX, "expanded-access-study", name="expanded access study")
-).append_parent(STUDY_TERM)
+).append_parent(INVESTIGATION_TERM)
 TERMS = [
-    STUDY_TERM,
+    INVESTIGATION_TERM,
+    CLINICAL_INVESTIGATION_TERM,
+    OBSERVATIONAL_INVESTIGATION_TERM,
     CLINICAL_TRIAL_TERM,
-    OBSERVATIONAL_CLINICAL_TRIAL_TERM,
-    INTERVENTIONAL_CLINICAL_TRIAL_TERM,
     EXPANDED_ACCESS_STUDY_TERM,
     RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
     NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
@@ -79,13 +78,13 @@ TERMS = [
 # types in ClinicalTrials.gov. See summary script at
 # https://gist.github.com/cthoyt/12a3cb3c63ad68d73fe5a2f0d506526f
 PARENTS: dict[tuple[str | None, str | None], Term] = {
-    ("INTERVENTIONAL", None): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
-    ("INTERVENTIONAL", "NA"): INTERVENTIONAL_CLINICAL_TRIAL_TERM,
+    ("INTERVENTIONAL", None): CLINICAL_TRIAL_TERM,
+    ("INTERVENTIONAL", "NA"): CLINICAL_TRIAL_TERM,
     ("INTERVENTIONAL", "RANDOMIZED"): RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
     ("INTERVENTIONAL", "NON_RANDOMIZED"): NON_RANDOMIZED_INTERVENTIONAL_CLINICAL_TRIAL_TERM,
-    ("OBSERVATIONAL", None): OBSERVATIONAL_CLINICAL_TRIAL_TERM,
+    ("OBSERVATIONAL", None): OBSERVATIONAL_INVESTIGATION_TERM,
     ("EXPANDED_ACCESS", None): EXPANDED_ACCESS_STUDY_TERM,
-    (None, None): STUDY_TERM,
+    (None, None): INVESTIGATION_TERM,
 }
@@ -95,7 +94,7 @@ class ClinicalTrialsGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
     typedefs = [has_contributor, INVESTIGATES_CONDITION, HAS_INTERVENTION]
-    root_terms = [STUDY_TERM.reference]
+    root_terms = [INVESTIGATION_TERM.reference]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms for studies."""

pyobo/sources/complexportal.py CHANGED Viewed

@@ -57,6 +57,7 @@ SPECIES = [
 DTYPE = {
     "taxonomy_id": str,
 }
+ROOT = Reference(prefix="go", identifier="0032991", name="macromolecular complex")
 def _parse_members(s) -> list[tuple[Reference, str]]:
@@ -157,10 +158,12 @@ class ComplexPortalGetter(Obo):
     bioversions_key = ontology = PREFIX
     typedefs = [from_species, has_part, has_citation]
+    root_terms = [ROOT]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return get_terms(version=self._version_or_raise)
+        yield Term(reference=ROOT)
+        yield from get_terms(version=self._version_or_raise)
 def get_df(version: str, force: bool = False) -> pd.DataFrame:
@@ -232,6 +235,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             definition=definition.strip() if pd.notna(definition) else None,
             synonyms=[Synonym(name=alias) for alias in aliases],
         )
+        term.append_parent(ROOT)
         for reference, note in xrefs:
             if note == "identity":
                 term.append_xref(reference)

pyobo/sources/drugcentral.py CHANGED Viewed

@@ -6,7 +6,6 @@ from collections.abc import Iterable
 from contextlib import closing
 import bioregistry
-import psycopg2
 from pydantic import ValidationError
 from tqdm.auto import tqdm
@@ -42,6 +41,8 @@ class DrugCentralGetter(Obo):
 def iter_terms() -> Iterable[Term]:
     """Iterate over DrugCentral terms."""
+    import psycopg2
     with closing(psycopg2.connect(**PARAMS)) as conn:
         with closing(conn.cursor()) as cur:
             cur.execute(

pyobo/sources/hgnc/hgnc.py CHANGED Viewed

@@ -7,6 +7,7 @@ import typing
 from collections import Counter, defaultdict
 from collections.abc import Iterable
+import pydantic
 from tabulate import tabulate
 from tqdm.auto import tqdm
@@ -280,7 +281,7 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                 continue  # only add concrete annotations
             term.append_relationship(
                 gene_product_member_of,
-                Reference(prefix="ec", identifier=ec_code),
+                Reference(prefix="ec", identifier=ec_code.strip()),
             )
         for rna_central_ids in entry.pop("rna_central_id", []):
             for rna_central_id in rna_central_ids.split(","):
@@ -314,7 +315,7 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
             )
         for mgi_curie in entry.pop("mgd_id", []):
             if not mgi_curie.startswith("MGI:"):
-                tqdm.write(f"hgnc:{identifier} had bad MGI CURIE: {mgi_curie}")
+                tqdm.write(f"[hgnc:{identifier}] had bad MGI CURIE: {mgi_curie}")
                 continue
             mgi_id = mgi_curie[len("MGI:") :]
             if not mgi_id:
@@ -335,7 +336,7 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                     Reference(prefix="iuphar.ligand", identifier=iuphar[len("ligandId:") :])
                 )
             else:
-                tqdm.write(f"unhandled IUPHAR: {iuphar}")
+                tqdm.write(f"[hgnc:{identifier}] unhandled IUPHAR: {iuphar}")
         for lrg_info in entry.pop("lsdb", []):
             if lrg_info.startswith("LRG_"):
@@ -360,9 +361,15 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                 xref_identifiers = [i.strip(".") for i in xref_identifiers]
             if len(xref_identifiers) == 1:
-                term.append_exact_match(
-                    Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
-                )
+                try:
+                    xref = Reference(prefix=xref_prefix, identifier=str(xref_identifiers[0]))
+                except pydantic.ValidationError:
+                    tqdm.write(
+                        f"[hgnc:{identifier}] had bad {key} xref: {xref_prefix}:{xref_identifiers[0]}"
+                    )
+                    continue
+                else:
+                    term.append_exact_match(xref)
             else:
                 for xref_identifier in xref_identifiers:
                     term.append_xref(Reference(prefix=xref_prefix, identifier=str(xref_identifier)))

pyobo/sources/iana_media_type.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""An ontology representation of IANA media types (i.e. MIME types).
+.. seealso:: https://www.iana.org/assignments/media-types/media-types.xhtml
+"""
+from collections.abc import Iterable
+from pyobo import Obo, Reference, Term, default_reference
+from pyobo.struct.typedef import term_replaced_by
+from pyobo.utils.path import ensure_df
+__all__ = ["IANAGetter"]
+PREFIX = "iana.mediatype"
+ROOT = Term.from_triple(prefix="dcterms", identifier="MediaType", name="media type")
+#: The top-level types listed on https://www.iana.org/assignments/media-types/media-types.xhtml
+MEDIA_TYPE_GROUPS = [
+    "application",
+    "audio",
+    "font",
+    "haptics",
+    "image",
+    "message",
+    "model",
+    "multipart",
+    "text",
+    "video",
+]
+GROUP_TO_CSV = {
+    media_type_group: (
+        f"https://www.iana.org/assignments/media-types/{media_type_group}.csv",
+        Term(reference=default_reference(PREFIX, media_type_group, media_type_group)).append_parent(
+            ROOT
+        ),
+    )
+    for media_type_group in MEDIA_TYPE_GROUPS
+}
+class IANAGetter(Obo):
+    """An ontology representation of IANA media types (i.e. MIME types)."""
+    ontology = bioregistry_key = PREFIX
+    name = "IANA Media Types"
+    dynamic_version = True
+    root_terms = [t.reference for _, (_, t) in sorted(GROUP_TO_CSV.items())]
+    typedefs = [
+        term_replaced_by,
+    ]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms()
+def get_terms() -> list[Term]:
+    """Get IANA Media Type terms."""
+    terms: dict[str, Term] = {}
+    forwards: dict[Term, str] = {}
+    for key, (url, parent) in GROUP_TO_CSV.items():
+        df = ensure_df(PREFIX, url=url, sep=",")
+        terms[key] = parent
+        for name, identifier, references in df.values:
+            if "OBSOLE" in name or "DEPRECATED" in name:
+                is_obsolete = True
+            else:
+                is_obsolete = None
+            term = Term(
+                reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
+                is_obsolete=is_obsolete,
+            ).append_parent(parent)
+            for reference in _process_references(references):
+                term.append_see_also_uri(reference)
+            terms[identifier.casefold()] = term
+            if "in favor of" in name:
+                _, _, new = name.partition("in favor of ")
+                forwards[term] = new.casefold().strip().rstrip(")").strip()
+    for old, new in forwards.items():
+        if new == "vnd.afpc.afplinedata":
+            new = "application/vnd.afpc.afplinedata"
+        old.append_replaced_by(terms[new].reference)
+    return list(terms.values())
+def _process_references(cell: str) -> list[str]:
+    rv = []
+    for part in cell.split("]["):
+        part = part.strip("[").strip("]")
+        if part.startswith("RFC"):
+            rv.append(f"https://www.iana.org/go/rfc{part.removeprefix('RFC')}")
+    return rv
+if __name__ == "__main__":
+    IANAGetter.cli()

pyobo/sources/mesh.py CHANGED Viewed

@@ -10,12 +10,12 @@ from pathlib import Path
 from typing import Any
 from xml.etree.ElementTree import Element
+import bioversions
 from lxml import etree
 from tqdm.auto import tqdm
-from pyobo.api.utils import safe_get_version
 from pyobo.identifier_utils import standardize_ec
-from pyobo.struct import Obo, Reference, Synonym, Term
+from pyobo.struct import Obo, Reference, Synonym, Term, default_reference
 from pyobo.utils.cache import cached_json, cached_mapping
 from pyobo.utils.path import ensure_path, prefix_directory_join
@@ -31,6 +31,37 @@ PREFIX = "mesh"
 NOW_YEAR = str(datetime.datetime.now().year)
 CAS_RE = re.compile(r"^\d{1,7}\-\d{2}\-\d$")
 UNII_RE = re.compile(r"[0-9A-Za-z]{10}$")
+SUPPLEMENT_PARENT = default_reference(
+    prefix=PREFIX, identifier="supplemental-record", name="supplemental records"
+)
+#: A mapping from tree header letters to labels
+#:
+#: .. seealso:: https://meshb-prev.nlm.nih.gov/treeView
+TREE_HEADER_TO_NAME = {
+    "A": "Anatomy",
+    "B": "Organisms",
+    "C": "Diseases",
+    "D": "Chemicals and Drugs",
+    "E": "Analytical, Diagnostic and Therapeutic Techniques, and Equipment",
+    "F": "Psychiatry and Psychology",
+    "G": "Phenomena and Processes",
+    "H": "Disciplines and Occupations",
+    "I": "Anthropology, Education, Sociology, and Social Phenomena",
+    "J": "Technology, Industry, and Agriculture",
+    "K": "Humanities",
+    "L": "Information Science",
+    "M": "Named Groups",
+    "N": "Health Care",
+    "V": "Publication Characteristics",
+    "Z": "Geographicals",
+}
+#: A mapping from tree header letters to term objects
+TREE_HEADERS: dict[str, Reference] = {
+    letter: default_reference(prefix=PREFIX, identifier=letter, name=name)
+    for letter, name in TREE_HEADER_TO_NAME.items()
+}
 def _get_xml_root(path: Path) -> Element:
@@ -46,13 +77,20 @@ class MeSHGetter(Obo):
     """An ontology representation of the Medical Subject Headings."""
     ontology = bioversions_key = PREFIX
+    root_terms = [
+        SUPPLEMENT_PARENT,
+        *TREE_HEADERS.values(),
+    ]
     def _get_version(self) -> str | None:
         return NOW_YEAR
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return get_terms(version=self._version_or_raise, force=force)
+        yield Term(reference=SUPPLEMENT_PARENT)
+        for x in TREE_HEADERS.values():
+            yield Term(reference=x)
+        yield from get_terms(version=self._version_or_raise, force=force)
 def get_tree_to_mesh_id(version: str) -> Mapping[str, str]:
@@ -74,21 +112,21 @@ def get_tree_to_mesh_id(version: str) -> Mapping[str, str]:
     return _inner()
-def get_terms(version: str, force: bool = False) -> Iterable[Term]:
+def get_terms(version: str, *, force: bool = False) -> Iterable[Term]:
     """Get MeSH OBO terms."""
     mesh_id_to_term: dict[str, Term] = {}
-    descriptors = ensure_mesh_descriptors(version=version, force=force)
+    descriptor_records = ensure_mesh_descriptors(version=version, force=force)
     supplemental_records = ensure_mesh_supplemental_records(version=version, force=force)
-    for entry in itt.chain(descriptors, supplemental_records):
-        identifier = entry["identifier"]
-        name = entry["name"]
-        definition = entry.get("scope_note")
+    for descriptor_record in itt.chain(descriptor_records, supplemental_records):
+        identifier = descriptor_record["identifier"]
+        name = descriptor_record["name"]
+        definition = descriptor_record.get("scope_note")
         xrefs: list[Reference] = []
         synonyms: set[str] = set()
-        for concept in entry["concepts"]:
+        for concept in descriptor_record["concepts"]:
             synonyms.add(concept["name"])
             for term in concept["terms"]:
                 synonyms.add(term["name"])
@@ -102,11 +140,23 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             xrefs=xrefs,
         )
-    for entry in descriptors:
-        mesh_id_to_term[entry["identifier"]].parents = [
-            mesh_id_to_term[parent_descriptor_id].reference
-            for parent_descriptor_id in entry["parents"]
-        ]
+    for descriptor_record in descriptor_records:
+        term = mesh_id_to_term[descriptor_record["identifier"]]
+        for parent_descriptor_id in descriptor_record["parents"]:
+            term.append_parent(mesh_id_to_term[parent_descriptor_id])
+        # This takes care of terms that don't have any parents like
+        # Body Regions (https://meshb.nlm.nih.gov/record/ui?ui=D001829),
+        # which have the tree code A01 and need to point to a made-up
+        # term for "A"
+        for top_level_letter in descriptor_record["top_levels"]:
+            term.append_parent(TREE_HEADERS[top_level_letter])
+    # MeSH supplementary records' identifiers start with "C"
+    # and do not have a hierarchy assigned to them
+    for supplemental_record in supplemental_records:
+        term = mesh_id_to_term[supplemental_record["identifier"]]
+        term.append_parent(SUPPLEMENT_PARENT)
     return mesh_id_to_term.values()
@@ -153,7 +203,7 @@ def ensure_mesh_supplemental_records(version: str, force: bool = False) -> list[
     return _inner()  # type:ignore
-def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict[str, Any]]:
+def get_descriptor_records(element: Element, id_key: str, name_key: str) -> list[dict[str, Any]]:
     """Get MeSH descriptor records."""
     logger.info("extract MeSH descriptors, concepts, and terms")
@@ -164,7 +214,7 @@ def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict
     logger.debug(f"got {len(rv)} descriptors")
     # cache tree numbers
-    tree_number_to_descriptor_ui = {
+    tree_number_to_descriptor_ui: dict[str, str] = {
         tree_number: descriptor["identifier"]
         for descriptor in rv
         for tree_number in descriptor["tree_numbers"]
@@ -173,26 +223,29 @@ def get_descriptor_records(element: Element, id_key: str, name_key) -> list[dict
     # add in parents to each descriptor based on their tree numbers
     for descriptor in rv:
+        top_levels = set()
         parents_descriptor_uis = set()
         for tree_number in descriptor["tree_numbers"]:
             try:
                 parent_tn, _self_tn = tree_number.rsplit(".", 1)
             except ValueError:
-                logger.debug("No dot for %s", tree_number)
-                continue
-            parent_descriptor_ui = tree_number_to_descriptor_ui.get(parent_tn)
-            if parent_descriptor_ui is not None:
-                parents_descriptor_uis.add(parent_descriptor_ui)
+                # e.g., this happens for A01 (Body Regions)
+                # https://meshb.nlm.nih.gov/record/ui?ui=D001829
+                top_levels.add(tree_number[0])
             else:
-                logger.debug("missing tree number: %s", parent_tn)
+                parent_descriptor_ui = tree_number_to_descriptor_ui.get(parent_tn)
+                if parent_descriptor_ui is not None:
+                    parents_descriptor_uis.add(parent_descriptor_ui)
+                else:
+                    tqdm.write(f"missing tree number: {parent_tn}")
-        descriptor["parents"] = list(parents_descriptor_uis)
+        descriptor["parents"] = sorted(parents_descriptor_uis)
+        descriptor["top_levels"] = sorted(top_levels)
     return rv
-def get_scope_note(descriptor_record) -> str | None:
+def get_scope_note(descriptor_record: Mapping[str, Any] | list[Mapping[str, Any]]) -> str | None:
     """Get the scope note from the preferred concept in a term's record."""
     if isinstance(descriptor_record, dict):
         # necessary for pre-2023 data
@@ -221,7 +274,7 @@ def get_descriptor_record(
     """
     concepts = get_concept_records(element)
     scope_note = get_scope_note(concepts)
-    rv = {
+    rv: dict[str, Any] = {
         "identifier": element.findtext(id_key),
         "name": element.findtext(name_key),
         "tree_numbers": sorted(
@@ -298,7 +351,7 @@ def get_term_records(element: Element) -> list[Mapping[str, Any]]:
     return [get_term_record(term) for term in element.findall("TermList/Term")]
-def get_term_record(element) -> Mapping[str, Any]:
+def get_term_record(element: Element) -> Mapping[str, Any]:
     """Get a single MeSH term record."""
     return {
         "term_ui": element.findtext("TermUI"),
@@ -363,7 +416,7 @@ def get_mesh_category_references(
         https://meshb.nlm.nih.gov/treeView
     """
     if version is None:
-        version = safe_get_version("mesh")
+        version = bioversions.get_version("mesh", strict=True)
     tree_to_mesh = get_tree_to_mesh_id(version=version)
     rv = []
     for i in range(1, 100):

pyobo 0.12.4__py3-none-any.whl → 0.12.6__py3-none-any.whl

pyobo 0.12.4py3-none-any.whl → 0.12.6py3-none-any.whl