PyPI - pyobo - Versions diffs - 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl - Mend

pyobo 0.10.4py3-none-any.whl → 0.10.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

pyobo/__init__.py +1 -0
pyobo/api/__init__.py +1 -0
pyobo/api/names.py +21 -0
pyobo/api/xrefs.py +8 -5
pyobo/gilda_utils.py +54 -47
pyobo/sources/__init__.py +10 -3
pyobo/sources/cgnc.py +4 -3
pyobo/sources/chembl.py +5 -3
pyobo/sources/depmap.py +4 -2
pyobo/sources/drugbank.py +4 -4
pyobo/sources/drugcentral.py +9 -5
pyobo/sources/geonames.py +229 -0
pyobo/sources/hgnc.py +32 -1
pyobo/sources/hgncgenefamily.py +1 -1
pyobo/sources/mgi.py +6 -2
pyobo/sources/mirbase.py +2 -0
pyobo/sources/mirbase_family.py +5 -2
pyobo/sources/mirbase_mature.py +5 -4
pyobo/sources/npass.py +1 -1
pyobo/sources/pombase.py +1 -1
pyobo/sources/ror.py +163 -0
pyobo/sources/sgd.py +2 -5
pyobo/sources/slm.py +14 -12
pyobo/sources/umls/get_synonym_types.py +36 -0
pyobo/sources/umls/synonym_types.tsv +243 -242
pyobo/sources/umls/umls.py +3 -7
pyobo/sources/zfin.py +3 -2
pyobo/struct/reference.py +13 -2
pyobo/struct/struct.py +72 -18
pyobo/struct/typedef.py +32 -6
pyobo/version.py +1 -1
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/METADATA +9 -9
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/RECORD +37 -34
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/WHEEL +1 -1
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/LICENSE +0 -0
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/entry_points.txt +0 -0
{pyobo-0.10.4.dist-info → pyobo-0.10.6.dist-info}/top_level.txt +0 -0

pyobo/sources/geonames.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Get terms from geonames."""
+import logging
+from typing import Collection, Iterable, Mapping
+import pandas as pd
+from pystow.utils import read_zipfile_csv
+from tqdm import tqdm
+from pyobo import Obo, Term
+from pyobo.struct import Reference, part_of
+from pyobo.utils.path import ensure_df, ensure_path
+__all__ = ["GeonamesGetter"]
+logger = logging.getLogger(__name__)
+PREFIX = "geonames"
+COUNTRIES_URL = "https://download.geonames.org/export/dump/countryInfo.txt"
+ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
+ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
+CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
+class GeonamesGetter(Obo):
+    """An ontology representation of GeoNames."""
+    ontology = PREFIX
+    dynamic_version = True
+    typedefs = [part_of]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms(force=force)
+def get_terms(*, force: bool = False) -> Collection[Term]:
+    """Get terms."""
+    code_to_country = get_code_to_country(force=force)
+    code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
+    code_to_admin2 = get_code_to_admin2(code_to_admin1, force=force)
+    id_to_term = get_cities(
+        code_to_country=code_to_country,
+        code_to_admin1=code_to_admin1,
+        code_to_admin2=code_to_admin2,
+        force=force,
+    )
+    return id_to_term.values()
+def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
+    """Get a mapping from country code to country term."""
+    countries_df = ensure_df(
+        PREFIX,
+        url=COUNTRIES_URL,
+        force=force,
+        skiprows=49,
+        keep_default_na=False,  # NA is a country code
+        dtype=str,
+    )
+    logger.info(f"got {len(countries_df.index):,} countries")
+    reorder = ["geonameid", *(c for c in countries_df.columns if c != "geonameid")]
+    countries_df = countries_df[reorder]
+    code_to_country = {}
+    cols = ["geonameid", "Country", "#ISO", "fips", "ISO3"]
+    for identifier, name, code, fips, iso3 in countries_df[cols].values:
+        if pd.isna(code):
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_synonym(code)
+        if name.startswith("The "):
+            term.append_synonym(name.removeprefix("The "))
+        if pd.notna(fips):
+            term.append_synonym(fips)
+        if pd.notna(iso3):
+            term.append_synonym(iso3)
+        term.append_property("code", code)
+        code_to_country[code] = term
+    logger.info(f"got {len(code_to_country):,} country records")
+    return code_to_country
+def get_code_to_admin1(
+    code_to_country: Mapping[str, Term], *, force: bool = False
+) -> Mapping[str, Term]:
+    """Get a mapping from admin1 code to term."""
+    admin1_df = ensure_df(
+        PREFIX,
+        url=ADMIN1_URL,
+        header=None,
+        names=["code", "name", "asciiname", "geonames_id"],
+        dtype=str,
+        force=force,
+    )
+    code_to_admin1 = {}
+    for code, name, asciiname, identifier in admin1_df.values:
+        if pd.isna(identifier) or pd.isna(code):
+            tqdm.write(f"Missing info for  {name} / {asciiname} / {code=} / {identifier=}")
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_property("code", code)
+        code_to_admin1[code] = term
+        country_code = code.split(".")[0]
+        country_term = code_to_country[country_code]
+        term.append_relationship(part_of, country_term)
+    return code_to_admin1
+def get_code_to_admin2(
+    code_to_admin1: Mapping[str, Term], *, force: bool = False
+) -> Mapping[str, Term]:
+    """Get a mapping from admin2 code to term."""
+    admin2_df = ensure_df(
+        PREFIX,
+        url=ADMIN2_URL,
+        header=None,
+        names=["code", "name", "asciiname", "geonames_id"],
+        dtype=str,
+        force=force,
+    )
+    code_to_admin2 = {}
+    for identifier, name, code in admin2_df[["geonames_id", "name", "code"]].values:
+        if pd.isna(identifier) or pd.isna(code):
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_property("code", code)
+        code_to_admin2[code] = term
+        admin1_code = code.rsplit(".", 1)[0]
+        admin1_term = code_to_admin1[admin1_code]
+        term.append_relationship(part_of, admin1_term)
+    return code_to_admin2
+def get_cities(
+    code_to_country,
+    code_to_admin1,
+    code_to_admin2,
+    *,
+    minimum_population: int = 100_000,
+    force: bool = False,
+) -> Mapping[str, Term]:
+    """Get a mapping from city code to term."""
+    columns = [
+        "geonames_id",
+        "name",
+        "asciiname",
+        "synonyms",
+        "latitude",
+        "longitude",
+        "feature_class",
+        "feature_code",
+        "country_code",
+        "cc2",
+        "admin1",
+        "admin2",
+        "admin3",
+        "admin4",
+        "population",
+        "elevation",
+        "dem",
+        "timezone",
+        "date_modified",
+    ]
+    path = ensure_path(PREFIX, url=CITIES_URL, force=force)
+    cities_df = read_zipfile_csv(
+        path=path,
+        inner_path="cities15000.txt",
+        header=None,
+        names=columns,
+        dtype=str,
+    )
+    cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
+    cities_df.synonyms = cities_df.synonyms.str.split(",")
+    terms = {}
+    for term in code_to_country.values():
+        terms[term.identifier] = term
+    cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "feature_code"]
+    for identifier, name, synonyms, country, admin1, admin2, feature_code in cities_df[cols].values:
+        terms[identifier] = term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_parent(Reference(prefix="geonames.feature", identifier=feature_code))
+        if synonyms and not isinstance(synonyms, float):
+            for synonym in synonyms:
+                if pd.notna(synonym):
+                    term.append_synonym(synonym)
+        if pd.isna(admin1):
+            tqdm.write(f"[geonames:{identifier}] missing admin 1 code for {name} ({country})")
+            continue
+        admin1_full = f"{country}.{admin1}"
+        admin1_term = code_to_admin1.get(admin1_full)
+        if admin1_term is None:
+            logger.info(f"could not find admin1 {admin1_full}")
+            continue
+        terms[admin1_term.identifier] = admin1_term
+        if pd.notna(admin2):
+            admin2_full = f"{country}.{admin1}.{admin2}"
+            admin2_term = code_to_admin2.get(admin2_full)
+            if admin2_term is None or admin1_term is None:
+                pass
+                # print("could not find admin2", admin2_full)
+            else:
+                term.append_relationship(part_of, admin2_term)
+                terms[admin2_term.identifier] = admin2_term
+        else:  # pd.notna(admin1):
+            # If there's no admin 2, just annotate directly onto admin 1
+            term.append_relationship(part_of, admin1_term)
+    return terms
+if __name__ == "__main__":
+    GeonamesGetter().write_default(write_obo=True, force=True)

pyobo/sources/hgnc.py CHANGED Viewed

@@ -27,6 +27,7 @@ from pyobo.struct import (
     orthologous,
     transcribes_to,
 )
+from pyobo.struct.typedef import exact_match
 from pyobo.utils.path import ensure_path, prefix_directory_join
 __all__ = [
@@ -108,6 +109,28 @@ ENCODINGS = {
     "unknown": "GRP",
 }
+SKIP_KEYS = {
+    "date_approved_reserved",
+    "_version_",
+    "uuid",
+    "date_modified",
+    "date_name_changed",
+    "date_symbol_changed",
+    "symbol_report_tag",
+    "location_sortable",
+    "curator_notes",
+    "agr",  # repeat of HGNC ID
+    "gencc",  # repeat of HGNC ID
+    "bioparadigms_slc",  # repeat of symbol
+    "lncrnadb",  # repeat of symbol
+    "gtrnadb",  # repeat of symbol
+    "horde_id",  # repeat of symbol
+    "imgt",  # repeat of symbol
+    "cd",  # symbol
+    "homeodb",  # TODO add to bioregistry, though this is defunct
+    "mamit-trnadb",  # TODO add to bioregistry, though this is defunct
+}
 #: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
 LOCUS_TYPE_TO_SO = {
     # protein-coding gene
@@ -190,6 +213,7 @@ class HGNCGetter(Obo):
         transcribes_to,
         orthologous,
         member_of,
+        exact_match,
     ]
     idspaces = IDSPACES
     synonym_typedefs = [
@@ -330,6 +354,12 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
             else:
                 tqdm.write(f"unhandled IUPHAR: {iuphar}")
+        for lrg_info in entry.pop("lsdb", []):
+            if lrg_info.startswith("LRG_"):
+                lrg_curie = lrg_info.split("|")[0]
+                _, lrg_id = lrg_curie.split("_")
+                term.append_xref(Reference(prefix="lrg", identifier=lrg_id))
         for xref_prefix, key in gene_xrefs:
             xref_identifiers = entry.pop(key, None)
             if xref_identifiers is None:
@@ -397,7 +427,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         term.set_species(identifier="9606", name="Homo sapiens")
         for key in entry:
-            unhandled_entry_keys[key] += 1
+            if key not in SKIP_KEYS:
+                unhandled_entry_keys[key] += 1
         yield term
     with open(prefix_directory_join(PREFIX, name="unhandled.json"), "w") as file:

pyobo/sources/hgncgenefamily.py CHANGED Viewed

@@ -36,7 +36,7 @@ class HGNCGroupGetter(Obo):
     """An ontology representation of HGNC's gene group nomenclature."""
     ontology = PREFIX
-    dynamic_version = True
+    bioversions_key = "hgnc"
     synonym_typedefs = [symbol_type]
     typedefs = [from_species, enables]

pyobo/sources/mgi.py CHANGED Viewed

@@ -9,6 +9,8 @@ from typing import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
+from pyobo.struct.typedef import exact_match
 from ..struct import (
     Obo,
     Reference,
@@ -37,7 +39,7 @@ class MGIGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [from_species, has_gene_product, transcribes_to]
+    typedefs = [from_species, has_gene_product, transcribes_to, exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -161,7 +163,9 @@ def get_terms(force: bool = False) -> Iterable[Term]:
             for synonym in mgi_to_synonyms[identifier]:
                 term.append_synonym(Synonym(name=synonym))
         if identifier in mgi_to_entrez_id:
-            term.append_xref(Reference(prefix="ncbigene", identifier=mgi_to_entrez_id[identifier]))
+            term.append_exact_match(
+                Reference(prefix="ncbigene", identifier=mgi_to_entrez_id[identifier])
+            )
         for ensembl_id in mgi_to_ensemble_accession_ids[identifier]:
             term.append_xref(Reference(prefix="ensembl", identifier=ensembl_id))
         for ensembl_id in mgi_to_ensemble_transcript_ids[identifier]:

pyobo/sources/mirbase.py CHANGED Viewed

@@ -136,6 +136,8 @@ def _process_definitions_lines(
             xref_prefix, xref_identifier, xref_label = map(str.strip, line.split(";"))
             xref_prefix = xref_prefix.lower()
             xref_prefix = xref_mapping.get(xref_prefix, xref_prefix)
+            if xref_prefix == "pictar":
+                continue
             xrefs.append(
                 Reference(prefix=xref_prefix, identifier=xref_identifier, name=xref_label or None)
             )

pyobo/sources/mirbase_family.py CHANGED Viewed

@@ -26,6 +26,7 @@ class MiRBaseFamilyGetter(Obo):
     ontology = PREFIX
     bioversions_key = "mirbase"
+    typedefs = [has_member]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -40,7 +41,9 @@ def get_obo(force: bool = False) -> Obo:
 def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get miRBase family terms."""
     df = get_df(version, force=force)
-    for family_id, name, mirna_id, mirna_name in tqdm(df.values, total=len(df.index)):
+    for family_id, name, mirna_id, mirna_name in tqdm(
+        df.values, total=len(df.index), unit_scale=True, desc="miRBase Family"
+    ):
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=family_id, name=name),
         )
@@ -65,4 +68,4 @@ def get_df(version: str, force: bool = False) -> pd.DataFrame:
 if __name__ == "__main__":
-    get_obo().write_default(use_tqdm=True)
+    get_obo().write_default(use_tqdm=True, write_obo=True, force=True)

pyobo/sources/mirbase_mature.py CHANGED Viewed

@@ -39,15 +39,16 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     for _, name, previous_name, mirbase_mature_id in tqdm(
         df.values, total=len(df.index), unit_scale=True
     ):
+        synonyms = []
+        if pd.notna(previous_name):
+            synonyms.append(Synonym(name=previous_name))
         yield Term(
             reference=Reference(
                 prefix=PREFIX, identifier=mirbase_mature_id, name=name if pd.notna(name) else None
             ),
-            synonyms=[
-                Synonym(name=previous_name),
-            ],
+            synonyms=synonyms,
         )
 if __name__ == "__main__":
-    MiRBaseMatureGetter.cli()
+    get_obo().write_default(write_obo=True, write_obograph=True, use_tqdm=True)

pyobo/sources/npass.py CHANGED Viewed

@@ -77,7 +77,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
                 logger.debug("multiple cids for %s: %s", identifier, pubchem_compound_ids)
             for pubchem_compound_id in pubchem_compound_ids:
                 term.append_xref(
-                    Reference(prefix="pubchem.compound", identifier=pubchem_compound_id)
+                    Reference(prefix="pubchem.compound", identifier=pubchem_compound_id.strip())
                 )
         for synonym in [iupac]:

pyobo/sources/pombase.py CHANGED Viewed

@@ -29,7 +29,7 @@ class PomBaseGetter(Obo):
     """An ontology representation of PomBase's fission yeast gene nomenclature."""
     ontology = bioversions_key = PREFIX
-    typedefs = [from_species, has_gene_product]
+    typedefs = [from_species, has_gene_product, orthologous]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/ror.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""Convert the Research Organization Registry (ROR) into an ontology."""
+import json
+import zipfile
+from typing import Iterable
+import bioregistry
+import zenodo_client
+from tqdm.auto import tqdm
+from pyobo.struct import Obo, Reference, Term, TypeDef
+from pyobo.struct.struct import acronym
+PREFIX = "ror"
+ROR_ZENODO_RECORD_ID = "10086202"
+# Constants
+ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
+LOCATED_IN = Reference(prefix="RO", identifier="0001025")
+PART_OF = Reference(prefix="BFO", identifier="0000050")
+HAS_PART = Reference(prefix="BFO", identifier="0000051")
+SUCCESSOR = Reference(prefix="BFO", identifier="0000063")
+PREDECESSOR = Reference(prefix="BFO", identifier="0000062")
+RMAP = {
+    "Related": TypeDef.from_triple("rdfs", "seeAlso"),
+    "Child": TypeDef(HAS_PART),
+    "Parent": TypeDef(PART_OF),
+    "Predecessor": TypeDef(PREDECESSOR),
+    "Successor": TypeDef(SUCCESSOR),
+    "Located in": TypeDef(LOCATED_IN),
+}
+NAME_REMAPPING = {
+    "'s-Hertogenbosch": "Den Bosch",  # SMH Netherlands, why u gotta be like this
+    "'s Heeren Loo": "s Heeren Loo",
+    "'s-Heerenberg": "s-Heerenberg",
+    "Institut Virion\\Serion": "Institut Virion/Serion",
+    "Hematology\\Oncology Clinic": "Hematology/Oncology Clinic",
+}
+class RORGetter(Obo):
+    """An ontology representation of the ROR."""
+    ontology = bioregistry_key = PREFIX
+    typedefs = list(RMAP.values())
+    synonym_typedefs = [acronym]
+    idspaces = {
+        "ror": "https://ror.org/",
+        "geonames": "https://www.geonames.org/",
+        "envo": "http://purl.obolibrary.org/obo/ENVO_",
+        "bfo": "http://purl.obolibrary.org/obo/BFO_",
+        "ro": "http://purl.obolibrary.org/obo/RO_",
+        "obi": "http://purl.obolibrary.org/obo/OBI_",
+        "omo": "http://purl.obolibrary.org/obo/OMO_",
+        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    }
+    def __post_init__(self):  # noqa: D105
+        self.data_version, _url, _path = _get_info()
+        super().__post_init__()
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_ror_terms(force=force)
+def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
+    """Iterate over terms in ROR."""
+    version, source_uri, records = get_latest(force=force)
+    unhandled_xref_prefixes = set()
+    for record in tqdm(records, unit_scale=True, unit="record", desc=PREFIX):
+        identifier = record["id"].removeprefix("https://ror.org/")
+        name = record["name"]
+        name = NAME_REMAPPING.get(name, name)
+        term = Term(
+            reference=Reference(prefix=PREFIX, identifier=identifier, name=name), type="Instance"
+        )
+        term.append_parent(ORG_CLASS)
+        if name.startswith("The "):
+            term.append_synonym(name.removeprefix("The "))
+        for relationship in record.get("relationships", []):
+            target_id = relationship["id"].removeprefix("https://ror.org/")
+            term.append_relationship(
+                RMAP[relationship["type"]], Reference(prefix=PREFIX, identifier=target_id)
+            )
+        term.is_obsolete = record.get("status") != "active"
+        for address in record.get("addresses", []):
+            city = address.get("geonames_city")
+            if not city:
+                continue
+            term.append_relationship(
+                RMAP["Located in"], Reference(prefix="geonames", identifier=str(city["id"]))
+            )
+        for label in record.get("labels", []):
+            label = label["label"]  # there's a language availabel in this dict too
+            term.append_synonym(label)
+            if label.startswith("The "):
+                term.append_synonym(label.removeprefix("The "))
+        for synonym in record.get("aliases", []):
+            term.append_synonym(synonym)
+            if synonym.startswith("The "):
+                term.append_synonym(synonym.removeprefix("The "))
+        for acronym_synonym in record.get("acronyms", []):
+            term.append_synonym(acronym_synonym, type=acronym)
+        for prefix, xref_data in record.get("external_ids", {}).items():
+            if prefix == "OrgRef":
+                # OrgRef refers to wikipedia page id, see
+                # https://stackoverflow.com/questions/6168020/what-is-wikipedia-pageid-how-to-change-it-into-real-page-url
+                continue
+            norm_prefix = bioregistry.normalize_prefix(prefix)
+            if norm_prefix is None:
+                if prefix not in unhandled_xref_prefixes:
+                    tqdm.write(f"Unhandled prefix: {prefix} in {name} ({term.curie}). Values:")
+                    for xref_id in xref_data["all"]:
+                        tqdm.write(f"- {xref_id}")
+                    unhandled_xref_prefixes.add(prefix)
+                continue
+            identifiers = xref_data["all"]
+            if isinstance(identifiers, str):
+                identifiers = [identifiers]
+            for xref_id in identifiers:
+                term.append_xref(Reference(prefix=norm_prefix, identifier=xref_id.replace(" ", "")))
+        yield term
+def _get_info(*, force: bool = False):
+    client = zenodo_client.Zenodo()
+    latest_record_id = client.get_latest_record(ROR_ZENODO_RECORD_ID)
+    response = client.get_record(latest_record_id)
+    response_json = response.json()
+    version = response_json["metadata"]["version"].lstrip("v")
+    file_record = response_json["files"][0]
+    name = file_record["key"]
+    url = file_record["links"]["self"]
+    path = client.download(latest_record_id, name=name, force=force)
+    return version, url, path
+def get_latest(*, force: bool = False):
+    """Get the latest ROR metadata and records."""
+    version, url, path = _get_info(force=force)
+    with zipfile.ZipFile(path) as zf:
+        for zip_info in zf.filelist:
+            if zip_info.filename.endswith(".json"):
+                with zf.open(zip_info) as file:
+                    return version, url, json.load(file)
+    raise FileNotFoundError
+if __name__ == "__main__":
+    RORGetter().write_default(write_obo=True, force=True)

pyobo/sources/sgd.py CHANGED Viewed

@@ -5,7 +5,7 @@
 from typing import Iterable
 from urllib.parse import unquote_plus
-from ..struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species
+from ..struct import Obo, Reference, Synonym, Term, from_species
 from ..utils.path import ensure_tar_df
 __all__ = [
@@ -21,15 +21,12 @@ URL = (
 )
 INNER_PATH = "S288C_reference_genome_R64-2-1_20150113/saccharomyces_cerevisiae_R64-2-1_20150113.gff"
-alias_type = SynonymTypeDef.from_text("alias")
 class SGDGetter(Obo):
     """An ontology representation of SGD's yeast gene nomenclature."""
     bioversions_key = ontology = PREFIX
     typedefs = [from_species]
-    synonym_typedefs = [alias_type]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms for SGD."""
@@ -68,7 +65,7 @@ def get_terms(ontology: Obo, force: bool = False) -> Iterable[Term]:
         aliases = d.get("Alias")
         if aliases:
             for alias in aliases.split(","):
-                synonyms.append(Synonym(name=unquote_plus(alias), type=alias_type))
+                synonyms.append(Synonym(name=unquote_plus(alias)))
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=identifier, name=name),

pyobo/sources/slm.py CHANGED Viewed

@@ -7,7 +7,9 @@ from typing import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
-from pyobo import Obo, SynonymTypeDef, Term
+from pyobo import Obo, Reference, Term
+from pyobo.struct.struct import abbreviation as abbreviation_typedef
+from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -37,14 +39,13 @@ COLUMNS = [
     "PMID",
 ]
-abreviation_type = SynonymTypeDef.from_text("abbreviation")
 class SLMGetter(Obo):
     """An ontology representation of SwissLipid's lipid nomenclature."""
     ontology = bioversions_key = PREFIX
-    synonym_typedefs = [abreviation_type]
+    typedefs = [exact_match]
+    synonym_typedefs = [abbreviation_typedef]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -90,28 +91,29 @@ def iter_terms(version: str, force: bool = False):
         else:
             raise ValueError(identifier)
         term = Term.from_triple(PREFIX, identifier, name)
-        term.append_property("level", level)
+        if pd.notna(level):
+            term.append_property("level", level)
         if pd.notna(abbreviation):
-            term.append_synonym(abbreviation, type=abreviation_type)
+            term.append_synonym(abbreviation, type=abbreviation_typedef)
         if pd.notna(synonyms):
             for synonym in synonyms.split("|"):
                 term.append_synonym(synonym.strip())
         if pd.notna(smiles):
-            term.append_property("smiles", smiles)
+            term.append_property(has_smiles, smiles)
         if pd.notna(inchi) and inchi != "InChI=none":
             if inchi.startswith("InChI="):
                 inchi = inchi[len("InChI=") :]
-            term.append_property("inchi", inchi)
+            term.append_property(has_inchi, inchi)
         if pd.notna(inchikey):
             if inchikey.startswith("InChIKey="):
                 inchikey = inchikey[len("InChIKey=") :]
-            term.append_property("inchikey", inchikey)
+            term.append_exact_match(Reference(prefix="inchikey", identifier=inchikey))
         if pd.notna(chebi_id):
-            term.append_xref(("chebi", chebi_id))
+            term.append_exact_match(("chebi", chebi_id))
         if pd.notna(lipidmaps_id):
-            term.append_xref(("lipidmaps", lipidmaps_id))
+            term.append_exact_match(("lipidmaps", lipidmaps_id))
         if pd.notna(hmdb_id):
-            term.append_xref(("hmdb", hmdb_id))
+            term.append_exact_match(("hmdb", hmdb_id))
         if pd.notna(pmids):
             for pmid in pmids.split("|"):
                 term.append_provenance(("pubmed", pmid))

pyobo 0.10.4__py3-none-any.whl → 0.10.6__py3-none-any.whl

pyobo 0.10.4py3-none-any.whl → 0.10.6py3-none-any.whl