PyPI - pyobo - Versions diffs - 0.10.5__py3-none-any.whl → 0.10.6__py3-none-any.whl - Mend

pyobo 0.10.5py3-none-any.whl → 0.10.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

pyobo/__init__.py +1 -0
pyobo/api/__init__.py +1 -0
pyobo/api/names.py +21 -0
pyobo/gilda_utils.py +54 -47
pyobo/sources/__init__.py +4 -0
pyobo/sources/cgnc.py +2 -1
pyobo/sources/chembl.py +2 -1
pyobo/sources/depmap.py +2 -0
pyobo/sources/drugcentral.py +2 -1
pyobo/sources/geonames.py +229 -0
pyobo/sources/hgnc.py +32 -1
pyobo/sources/mgi.py +3 -1
pyobo/sources/mirbase.py +2 -0
pyobo/sources/mirbase_family.py +5 -2
pyobo/sources/npass.py +1 -1
pyobo/sources/pombase.py +1 -1
pyobo/sources/ror.py +163 -0
pyobo/sources/sgd.py +2 -5
pyobo/sources/slm.py +6 -6
pyobo/sources/umls/get_synonym_types.py +36 -0
pyobo/sources/umls/synonym_types.tsv +243 -242
pyobo/sources/umls/umls.py +3 -7
pyobo/sources/zfin.py +2 -1
pyobo/struct/reference.py +12 -1
pyobo/struct/struct.py +71 -17
pyobo/struct/typedef.py +21 -6
pyobo/version.py +1 -1
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/METADATA +2 -2
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/RECORD +33 -30
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/WHEEL +1 -1
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/LICENSE +0 -0
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/entry_points.txt +0 -0
{pyobo-0.10.5.dist-info → pyobo-0.10.6.dist-info}/top_level.txt +0 -0

pyobo/__init__.py CHANGED Viewed

@@ -25,6 +25,7 @@ from .api import (  # noqa: F401
     get_name,
     get_name_by_curie,
     get_name_id_mapping,
+    get_obsolete,
     get_primary_curie,
     get_primary_identifier,
     get_properties,

pyobo/api/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ from .names import (  # noqa: F401
     get_name,
     get_name_by_curie,
     get_name_id_mapping,
+    get_obsolete,
     get_synonyms,
 )
 from .properties import (  # noqa: F401

pyobo/api/names.py CHANGED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "get_id_definition_mapping",
     "get_synonyms",
     "get_id_synonyms_mapping",
+    "get_obsolete",
 ]
 logger = logging.getLogger(__name__)
@@ -184,6 +185,26 @@ def get_id_definition_mapping(
     return _get_mapping()
+def get_obsolete(
+    prefix: str,
+    *,
+    force: bool = False,
+    strict: bool = False,
+    version: Optional[str] = None,
+) -> Set[str]:
+    """Get the set of obsolete local unique identifiers."""
+    if version is None:
+        version = get_version(prefix)
+    path = prefix_cache_join(prefix, name="obsolete.tsv", version=version)
+    @cached_collection(path=path, force=force)
+    def _get_obsolete() -> Set[str]:
+        ontology = get_ontology(prefix, force=force, strict=strict, version=version)
+        return ontology.get_obsolete()
+    return set(_get_obsolete())
 @wrap_norm_prefix
 def get_synonyms(prefix: str, identifier: str) -> Optional[List[str]]:
     """Get the synonyms for an entity."""

pyobo/gilda_utils.py CHANGED Viewed

@@ -2,7 +2,6 @@
 """PyOBO's Gilda utilities."""
-import itertools as itt
 import logging
 from typing import Iterable, List, Optional, Tuple, Type, Union
@@ -11,6 +10,7 @@ import gilda.api
 import gilda.term
 from gilda.grounder import Grounder
 from gilda.process import normalize
+from gilda.term import filter_out_duplicates
 from tqdm.auto import tqdm
 from pyobo import (
@@ -18,6 +18,7 @@ from pyobo import (
     get_id_species_mapping,
     get_id_synonyms_mapping,
     get_ids,
+    get_obsolete,
 )
 from pyobo.getters import NoBuild
 from pyobo.utils.io import multidict
@@ -31,32 +32,6 @@ __all__ = [
 logger = logging.getLogger(__name__)
-_STATUSES = {"curated": 1, "name": 2, "synonym": 3, "former_name": 4}
-def filter_out_duplicates(terms: List[gilda.term.Term]) -> List[gilda.term.Term]:
-    """Filter out duplicates."""
-    # TODO import from gilda.term import filter_out_duplicates when it gets moved,
-    #  see https://github.com/indralab/gilda/pull/103
-    logger.debug("filtering %d terms for uniqueness", len(terms))
-    new_terms: List[gilda.term.Term] = [
-        min(terms_group, key=_status_key)
-        for _, terms_group in itt.groupby(sorted(terms, key=_term_key), key=_term_key)
-    ]
-    # Re-sort the terms
-    new_terms = sorted(new_terms, key=lambda x: (x.text, x.db, x.id))
-    logger.debug("got %d unique terms.", len(new_terms))
-    return new_terms
-def _status_key(term: gilda.term.Term) -> int:
-    return _STATUSES[term.status]
-def _term_key(term: gilda.term.Term) -> Tuple[str, str, str]:
-    return term.db, term.id, term.text
 def iter_gilda_prediction_tuples(
     prefix: str,
     relation: str = "skos:exactMatch",
@@ -115,10 +90,12 @@ def normalize_identifier(prefix: str, identifier: str) -> str:
 def get_grounder(
     prefixes: Union[str, Iterable[str]],
+    *,
     unnamed: Optional[Iterable[str]] = None,
     grounder_cls: Optional[Type[Grounder]] = None,
     versions: Union[None, str, Iterable[Union[str, None]]] = None,
     strict: bool = True,
+    skip_obsolete: bool = False,
 ) -> Grounder:
     """Get a Gilda grounder for the given prefix(es)."""
     unnamed = set() if unnamed is None else set(unnamed)
@@ -140,7 +117,11 @@ def get_grounder(
         try:
             p_terms = list(
                 get_gilda_terms(
-                    prefix, identifiers_are_names=prefix in unnamed, version=version, strict=strict
+                    prefix,
+                    identifiers_are_names=prefix in unnamed,
+                    version=version,
+                    strict=strict,
+                    skip_obsolete=skip_obsolete,
                 )
             )
         except NoBuild:
@@ -155,26 +136,50 @@ def get_grounder(
         return grounder_cls(terms_dict)
+def _fast_term(
+    *,
+    text: str,
+    prefix: str,
+    identifier: str,
+    name: str,
+    status: str,
+    organism: Optional[str] = None,
+) -> gilda.term.Term:
+    return gilda.term.Term(
+        norm_text=normalize(text),
+        text=text,
+        db=prefix,
+        id=identifier,
+        entry_name=name,
+        status=status,
+        source=prefix,
+        organism=organism,
+    )
 def get_gilda_terms(
     prefix: str,
+    *,
     identifiers_are_names: bool = False,
     version: Optional[str] = None,
     strict: bool = True,
+    skip_obsolete: bool = False,
 ) -> Iterable[gilda.term.Term]:
     """Get gilda terms for the given namespace."""
     id_to_name = get_id_name_mapping(prefix, version=version, strict=strict)
     id_to_species = get_id_species_mapping(prefix, version=version, strict=strict)
+    obsoletes = get_obsolete(prefix, version=version, strict=strict) if skip_obsolete else set()
     it = tqdm(id_to_name.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="name")
     for identifier, name in it:
-        yield gilda.term.Term(
-            norm_text=normalize(name),
+        if identifier in obsoletes:
+            continue
+        yield _fast_term(
             text=name,
-            db=prefix,
-            id=identifier,
-            entry_name=name,
+            prefix=prefix,
+            identifier=identifier,
+            name=name,
             status="name",
-            source=prefix,
             organism=id_to_species.get(identifier),
         )
@@ -184,29 +189,31 @@ def get_gilda_terms(
             id_to_synonyms.items(), desc=f"[{prefix}] mapping", unit_scale=True, unit="synonym"
         )
         for identifier, synonyms in it:
+            if identifier in obsoletes:
+                continue
             name = id_to_name[identifier]
             for synonym in synonyms:
-                yield gilda.term.Term(
-                    norm_text=normalize(synonym),
+                if not synonym:
+                    continue
+                yield _fast_term(
                     text=synonym,
-                    db=prefix,
-                    id=identifier,
-                    entry_name=name,
+                    prefix=prefix,
+                    identifier=identifier,
+                    name=name,
                     status="synonym",
-                    source=prefix,
                     organism=id_to_species.get(identifier),
                 )
     if identifiers_are_names:
         it = tqdm(get_ids(prefix), desc=f"[{prefix}] mapping", unit_scale=True, unit="id")
         for identifier in it:
-            yield gilda.term.Term(
-                norm_text=normalize(identifier),
+            if identifier in obsoletes:
+                continue
+            yield _fast_term(
                 text=identifier,
-                db=prefix,
-                id=identifier,
-                entry_name=None,
-                status="identifier",
-                source=prefix,
+                prefix=prefix,
+                identifier=identifier,
+                name=identifier,
+                status="name",
                 organism=id_to_species.get(identifier),
             )

pyobo/sources/__init__.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .drugcentral import DrugCentralGetter
 from .expasy import ExpasyGetter
 from .famplex import FamPlexGetter
 from .flybase import FlyBaseGetter
+from .geonames import GeonamesGetter
 from .gwascentral_phenotype import GWASCentralPhenotypeGetter
 from .gwascentral_study import GWASCentralStudyGetter
 from .hgnc import HGNCGetter
@@ -46,6 +47,7 @@ from .pubchem import PubChemCompoundGetter
 from .reactome import ReactomeGetter
 from .rgd import RGDGetter
 from .rhea import RheaGetter
+from .ror import RORGetter
 from .selventa import SCHEMGetter, SCOMPGetter, SDISGetter, SFAMGetter
 from .sgd import SGDGetter
 from .slm import SLMGetter
@@ -74,6 +76,7 @@ __all__ = [
     "FlyBaseGetter",
     "GWASCentralPhenotypeGetter",
     "GWASCentralStudyGetter",
+    "GeonamesGetter",
     "HGNCGetter",
     "HGNCGroupGetter",
     "ICD10Getter",
@@ -98,6 +101,7 @@ __all__ = [
     "PomBaseGetter",
     "PubChemCompoundGetter",
     "RGDGetter",
+    "RORGetter",
     "ReactomeGetter",
     "RheaGetter",
     "SCHEMGetter",

pyobo/sources/cgnc.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Iterable
 import pandas as pd
 from pyobo.struct import Obo, Reference, Term, from_species
+from pyobo.struct.typedef import exact_match
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -25,7 +26,7 @@ class CGNCGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [from_species]
+    typedefs = [from_species, exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/chembl.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Iterable
 import chembl_downloader
 from pyobo.struct import Obo, Reference, Term
-from pyobo.struct.typedef import has_inchi, has_smiles
+from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
 __all__ = [
     "ChEMBLCompoundGetter",
@@ -45,6 +45,7 @@ class ChEMBLCompoundGetter(Obo):
     ontology = "chembl.compound"
     bioversions_key = "chembl"
+    typedefs = [exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/depmap.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import pystow
 from pyobo import Obo, Reference, Term
+from pyobo.struct.typedef import exact_match
 __all__ = [
     "get_obo",
@@ -23,6 +24,7 @@ class DepMapGetter(Obo):
     ontology = bioversions_key = PREFIX
     data_version = VERSION
+    typedefs = [exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/drugcentral.py CHANGED Viewed

@@ -12,7 +12,7 @@ import psycopg2
 from tqdm.auto import tqdm
 from pyobo.struct import Obo, Reference, Synonym, Term
-from pyobo.struct.typedef import has_inchi, has_smiles
+from pyobo.struct.typedef import exact_match, has_inchi, has_smiles
 __all__ = [
     "DrugCentralGetter",
@@ -34,6 +34,7 @@ class DrugCentralGetter(Obo):
     """An ontology representation of the DrugCentral database."""
     ontology = bioversions_key = PREFIX
+    typedefs = [exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/geonames.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Get terms from geonames."""
+import logging
+from typing import Collection, Iterable, Mapping
+import pandas as pd
+from pystow.utils import read_zipfile_csv
+from tqdm import tqdm
+from pyobo import Obo, Term
+from pyobo.struct import Reference, part_of
+from pyobo.utils.path import ensure_df, ensure_path
+__all__ = ["GeonamesGetter"]
+logger = logging.getLogger(__name__)
+PREFIX = "geonames"
+COUNTRIES_URL = "https://download.geonames.org/export/dump/countryInfo.txt"
+ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
+ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
+CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
+class GeonamesGetter(Obo):
+    """An ontology representation of GeoNames."""
+    ontology = PREFIX
+    dynamic_version = True
+    typedefs = [part_of]
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return get_terms(force=force)
+def get_terms(*, force: bool = False) -> Collection[Term]:
+    """Get terms."""
+    code_to_country = get_code_to_country(force=force)
+    code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
+    code_to_admin2 = get_code_to_admin2(code_to_admin1, force=force)
+    id_to_term = get_cities(
+        code_to_country=code_to_country,
+        code_to_admin1=code_to_admin1,
+        code_to_admin2=code_to_admin2,
+        force=force,
+    )
+    return id_to_term.values()
+def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
+    """Get a mapping from country code to country term."""
+    countries_df = ensure_df(
+        PREFIX,
+        url=COUNTRIES_URL,
+        force=force,
+        skiprows=49,
+        keep_default_na=False,  # NA is a country code
+        dtype=str,
+    )
+    logger.info(f"got {len(countries_df.index):,} countries")
+    reorder = ["geonameid", *(c for c in countries_df.columns if c != "geonameid")]
+    countries_df = countries_df[reorder]
+    code_to_country = {}
+    cols = ["geonameid", "Country", "#ISO", "fips", "ISO3"]
+    for identifier, name, code, fips, iso3 in countries_df[cols].values:
+        if pd.isna(code):
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_synonym(code)
+        if name.startswith("The "):
+            term.append_synonym(name.removeprefix("The "))
+        if pd.notna(fips):
+            term.append_synonym(fips)
+        if pd.notna(iso3):
+            term.append_synonym(iso3)
+        term.append_property("code", code)
+        code_to_country[code] = term
+    logger.info(f"got {len(code_to_country):,} country records")
+    return code_to_country
+def get_code_to_admin1(
+    code_to_country: Mapping[str, Term], *, force: bool = False
+) -> Mapping[str, Term]:
+    """Get a mapping from admin1 code to term."""
+    admin1_df = ensure_df(
+        PREFIX,
+        url=ADMIN1_URL,
+        header=None,
+        names=["code", "name", "asciiname", "geonames_id"],
+        dtype=str,
+        force=force,
+    )
+    code_to_admin1 = {}
+    for code, name, asciiname, identifier in admin1_df.values:
+        if pd.isna(identifier) or pd.isna(code):
+            tqdm.write(f"Missing info for  {name} / {asciiname} / {code=} / {identifier=}")
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_property("code", code)
+        code_to_admin1[code] = term
+        country_code = code.split(".")[0]
+        country_term = code_to_country[country_code]
+        term.append_relationship(part_of, country_term)
+    return code_to_admin1
+def get_code_to_admin2(
+    code_to_admin1: Mapping[str, Term], *, force: bool = False
+) -> Mapping[str, Term]:
+    """Get a mapping from admin2 code to term."""
+    admin2_df = ensure_df(
+        PREFIX,
+        url=ADMIN2_URL,
+        header=None,
+        names=["code", "name", "asciiname", "geonames_id"],
+        dtype=str,
+        force=force,
+    )
+    code_to_admin2 = {}
+    for identifier, name, code in admin2_df[["geonames_id", "name", "code"]].values:
+        if pd.isna(identifier) or pd.isna(code):
+            continue
+        term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_property("code", code)
+        code_to_admin2[code] = term
+        admin1_code = code.rsplit(".", 1)[0]
+        admin1_term = code_to_admin1[admin1_code]
+        term.append_relationship(part_of, admin1_term)
+    return code_to_admin2
+def get_cities(
+    code_to_country,
+    code_to_admin1,
+    code_to_admin2,
+    *,
+    minimum_population: int = 100_000,
+    force: bool = False,
+) -> Mapping[str, Term]:
+    """Get a mapping from city code to term."""
+    columns = [
+        "geonames_id",
+        "name",
+        "asciiname",
+        "synonyms",
+        "latitude",
+        "longitude",
+        "feature_class",
+        "feature_code",
+        "country_code",
+        "cc2",
+        "admin1",
+        "admin2",
+        "admin3",
+        "admin4",
+        "population",
+        "elevation",
+        "dem",
+        "timezone",
+        "date_modified",
+    ]
+    path = ensure_path(PREFIX, url=CITIES_URL, force=force)
+    cities_df = read_zipfile_csv(
+        path=path,
+        inner_path="cities15000.txt",
+        header=None,
+        names=columns,
+        dtype=str,
+    )
+    cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
+    cities_df.synonyms = cities_df.synonyms.str.split(",")
+    terms = {}
+    for term in code_to_country.values():
+        terms[term.identifier] = term
+    cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "feature_code"]
+    for identifier, name, synonyms, country, admin1, admin2, feature_code in cities_df[cols].values:
+        terms[identifier] = term = Term.from_triple(
+            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        )
+        term.append_parent(Reference(prefix="geonames.feature", identifier=feature_code))
+        if synonyms and not isinstance(synonyms, float):
+            for synonym in synonyms:
+                if pd.notna(synonym):
+                    term.append_synonym(synonym)
+        if pd.isna(admin1):
+            tqdm.write(f"[geonames:{identifier}] missing admin 1 code for {name} ({country})")
+            continue
+        admin1_full = f"{country}.{admin1}"
+        admin1_term = code_to_admin1.get(admin1_full)
+        if admin1_term is None:
+            logger.info(f"could not find admin1 {admin1_full}")
+            continue
+        terms[admin1_term.identifier] = admin1_term
+        if pd.notna(admin2):
+            admin2_full = f"{country}.{admin1}.{admin2}"
+            admin2_term = code_to_admin2.get(admin2_full)
+            if admin2_term is None or admin1_term is None:
+                pass
+                # print("could not find admin2", admin2_full)
+            else:
+                term.append_relationship(part_of, admin2_term)
+                terms[admin2_term.identifier] = admin2_term
+        else:  # pd.notna(admin1):
+            # If there's no admin 2, just annotate directly onto admin 1
+            term.append_relationship(part_of, admin1_term)
+    return terms
+if __name__ == "__main__":
+    GeonamesGetter().write_default(write_obo=True, force=True)

pyobo/sources/hgnc.py CHANGED Viewed

@@ -27,6 +27,7 @@ from pyobo.struct import (
     orthologous,
     transcribes_to,
 )
+from pyobo.struct.typedef import exact_match
 from pyobo.utils.path import ensure_path, prefix_directory_join
 __all__ = [
@@ -108,6 +109,28 @@ ENCODINGS = {
     "unknown": "GRP",
 }
+SKIP_KEYS = {
+    "date_approved_reserved",
+    "_version_",
+    "uuid",
+    "date_modified",
+    "date_name_changed",
+    "date_symbol_changed",
+    "symbol_report_tag",
+    "location_sortable",
+    "curator_notes",
+    "agr",  # repeat of HGNC ID
+    "gencc",  # repeat of HGNC ID
+    "bioparadigms_slc",  # repeat of symbol
+    "lncrnadb",  # repeat of symbol
+    "gtrnadb",  # repeat of symbol
+    "horde_id",  # repeat of symbol
+    "imgt",  # repeat of symbol
+    "cd",  # symbol
+    "homeodb",  # TODO add to bioregistry, though this is defunct
+    "mamit-trnadb",  # TODO add to bioregistry, though this is defunct
+}
 #: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
 LOCUS_TYPE_TO_SO = {
     # protein-coding gene
@@ -190,6 +213,7 @@ class HGNCGetter(Obo):
         transcribes_to,
         orthologous,
         member_of,
+        exact_match,
     ]
     idspaces = IDSPACES
     synonym_typedefs = [
@@ -330,6 +354,12 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
             else:
                 tqdm.write(f"unhandled IUPHAR: {iuphar}")
+        for lrg_info in entry.pop("lsdb", []):
+            if lrg_info.startswith("LRG_"):
+                lrg_curie = lrg_info.split("|")[0]
+                _, lrg_id = lrg_curie.split("_")
+                term.append_xref(Reference(prefix="lrg", identifier=lrg_id))
         for xref_prefix, key in gene_xrefs:
             xref_identifiers = entry.pop(key, None)
             if xref_identifiers is None:
@@ -397,7 +427,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         term.set_species(identifier="9606", name="Homo sapiens")
         for key in entry:
-            unhandled_entry_keys[key] += 1
+            if key not in SKIP_KEYS:
+                unhandled_entry_keys[key] += 1
         yield term
     with open(prefix_directory_join(PREFIX, name="unhandled.json"), "w") as file:

pyobo/sources/mgi.py CHANGED Viewed

@@ -9,6 +9,8 @@ from typing import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
+from pyobo.struct.typedef import exact_match
 from ..struct import (
     Obo,
     Reference,
@@ -37,7 +39,7 @@ class MGIGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [from_species, has_gene_product, transcribes_to]
+    typedefs = [from_species, has_gene_product, transcribes_to, exact_match]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""

pyobo/sources/mirbase.py CHANGED Viewed

@@ -136,6 +136,8 @@ def _process_definitions_lines(
             xref_prefix, xref_identifier, xref_label = map(str.strip, line.split(";"))
             xref_prefix = xref_prefix.lower()
             xref_prefix = xref_mapping.get(xref_prefix, xref_prefix)
+            if xref_prefix == "pictar":
+                continue
             xrefs.append(
                 Reference(prefix=xref_prefix, identifier=xref_identifier, name=xref_label or None)
             )

pyobo/sources/mirbase_family.py CHANGED Viewed

@@ -26,6 +26,7 @@ class MiRBaseFamilyGetter(Obo):
     ontology = PREFIX
     bioversions_key = "mirbase"
+    typedefs = [has_member]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -40,7 +41,9 @@ def get_obo(force: bool = False) -> Obo:
 def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get miRBase family terms."""
     df = get_df(version, force=force)
-    for family_id, name, mirna_id, mirna_name in tqdm(df.values, total=len(df.index)):
+    for family_id, name, mirna_id, mirna_name in tqdm(
+        df.values, total=len(df.index), unit_scale=True, desc="miRBase Family"
+    ):
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=family_id, name=name),
         )
@@ -65,4 +68,4 @@ def get_df(version: str, force: bool = False) -> pd.DataFrame:
 if __name__ == "__main__":
-    get_obo().write_default(use_tqdm=True)
+    get_obo().write_default(use_tqdm=True, write_obo=True, force=True)

pyobo 0.10.5__py3-none-any.whl → 0.10.6__py3-none-any.whl

pyobo 0.10.5py3-none-any.whl → 0.10.6py3-none-any.whl