PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/sources/complexportal.py CHANGED Viewed

@@ -7,7 +7,16 @@ import pandas as pd
 from tqdm.auto import tqdm
 from pyobo.resources.ncbitaxon import get_ncbitaxon_name
-from pyobo.struct import Obo, Reference, Synonym, Term, from_species, has_part
+from pyobo.struct import (
+    Obo,
+    Reference,
+    Synonym,
+    Term,
+    _parse_str_or_curie_or_uri,
+    from_species,
+    has_citation,
+    has_part,
+)
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -96,13 +105,14 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
         xref = xref.replace("protein ontology:PR_", "PR:")
         xref = xref.replace("rhea:rhea ", "rhea:")
         xref = xref.replace("rhea:Rhea ", "rhea:")
+        xref = xref.replace("rhea:RHEA ", "rhea:")
         xref = xref.replace("rhea:RHEA:rhea", "rhea:")
         xref = xref.replace("rhea:RHEA: ", "rhea:")
         xref = xref.replace("rhea:RHEA:rhea ", "rhea:")
         xref = xref.replace("intenz:RHEA:", "rhea:")
-        xref = xref.replace("eccode::", "eccode:")
-        xref = xref.replace("eccode:EC:", "eccode:")
-        xref = xref.replace("intenz:EC:", "eccode:")
+        xref = xref.replace("eccode::", "ec:")
+        xref = xref.replace("eccode:EC:", "ec:")
+        xref = xref.replace("intenz:EC:", "ec:")
         xref = xref.replace("eccode:RHEA:", "rhea:")
         xref = xref.replace("efo:MONDO:", "MONDO:")
         xref = xref.replace("omim:MIM:", "omim:")
@@ -125,7 +135,7 @@ def _parse_xrefs(s) -> list[tuple[Reference, str]]:
             xref_curie = _clean_intenz(xref_curie)
         try:
-            reference = Reference.from_curie(xref_curie)
+            reference = _parse_str_or_curie_or_uri(xref_curie)
         except ValueError:
             logger.warning("can not parse CURIE: %s", xref_curie)
             continue
@@ -146,18 +156,13 @@ class ComplexPortalGetter(Obo):
     """An ontology representation of the Complex Portal."""
     bioversions_key = ontology = PREFIX
-    typedefs = [from_species, has_part]
+    typedefs = [from_species, has_part, has_citation]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return get_terms(version=self._version_or_raise)
-def get_obo(force: bool = False) -> Obo:
-    """Get the ComplexPortal OBO."""
-    return ComplexPortalGetter(force=force)
 def get_df(version: str, force: bool = False) -> pd.DataFrame:
     """Get a combine ComplexPortal dataframe."""
     url_base = f"ftp://ftp.ebi.ac.uk/pub/databases/intact/complex/{version}/complextab"
@@ -222,29 +227,24 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         taxonomy_name,
         members,
     ) in it:
-        synonyms = [Synonym(name=alias) for alias in aliases]
-        _xrefs = []
-        provenance = []
+        term = Term(
+            reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
+            definition=definition.strip() if pd.notna(definition) else None,
+            synonyms=[Synonym(name=alias) for alias in aliases],
+        )
         for reference, note in xrefs:
             if note == "identity":
-                _xrefs.append(reference)
+                term.append_xref(reference)
             elif note == "see-also" and reference.prefix == "pubmed":
-                provenance.append(reference)
+                term.append_provenance(reference)
             elif (note, reference.prefix) not in unhandled_xref_type:
                 logger.debug(f"unhandled xref type: {note} / {reference.prefix}")
                 unhandled_xref_type.add((note, reference.prefix))
-        term = Term(
-            reference=Reference(prefix=PREFIX, identifier=complexportal_id, name=name),
-            definition=definition.strip() if pd.notna(definition) else None,
-            synonyms=synonyms,
-            xrefs=_xrefs,
-            provenance=provenance,
-        )
         term.set_species(identifier=taxonomy_id, name=taxonomy_name)
         for reference, _count in members:
-            term.append_relationship(has_part, reference)
+            term.annotate_object(has_part, reference)
         yield term

pyobo/sources/conso.py CHANGED Viewed

@@ -4,7 +4,7 @@ from collections.abc import Iterable
 import pandas as pd
-from ..struct import Obo, Reference, Synonym, Term
+from ..struct import Obo, Reference, Synonym, Term, _parse_str_or_curie_or_uri, has_citation
 from ..utils.io import multidict
 from ..utils.path import ensure_df
@@ -25,36 +25,28 @@ class CONSOGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
+    typedefs = [has_citation]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return iter_terms()
-def get_obo() -> Obo:
-    """Get CONSO as OBO."""
-    return CONSOGetter()
 def iter_terms() -> Iterable[Term]:
     """Get CONSO terms."""
     terms_df = ensure_df(PREFIX, url=TERMS_URL)
     synonyms_df = ensure_df(PREFIX, url=SYNONYMS_URL)
     synonyms_df["reference"] = synonyms_df["reference"].map(
-        lambda s: [Reference.from_curie(s)] if pd.notna(s) and s != "?" else [],
+        lambda s: [_parse_str_or_curie_or_uri(s)] if pd.notna(s) and s != "?" else [],
     )
-    synonyms_df["specificity"] = synonyms_df["specificity"].map(
-        lambda s: "EXACT" if pd.isna(s) or s == "?" else s
-    )
     synonyms = multidict(
         (
             identifier,
             Synonym(
                 name=synonym,
                 provenance=provenance,
-                specificity=specificity,
+                specificity=None if pd.isna(specificity) or specificity == "?" else specificity,
             ),
         )
         for identifier, synonym, provenance, specificity in synonyms_df.values
@@ -66,21 +58,21 @@ def iter_terms() -> Iterable[Term]:
     for _, row in terms_df.iterrows():
         if row["Name"] == "WITHDRAWN":
             continue
-        provenance: list[Reference] = []
-        for curie in row["References"].split(","):
-            curie = curie.strip()
-            if not curie:
-                continue
-            reference = Reference.from_curie(curie)
-            if reference is not None:
-                provenance.append(reference)
         identifier = row["Identifier"]
-        yield Term(
+        term = Term(
             reference=Reference(prefix=PREFIX, identifier=identifier, name=row["Name"]),
             definition=row["Description"],
-            provenance=provenance,
             synonyms=synonyms.get(identifier, []),
         )
+        for curie in row["References"].split(","):
+            curie = curie.strip()
+            if not curie:
+                continue
+            reference = _parse_str_or_curie_or_uri(curie)
+            if reference is not None:
+                term.append_provenance(reference)
+        yield term
 if __name__ == "__main__":

pyobo/sources/cpt.py CHANGED Viewed

File without changes

pyobo/sources/credit.py CHANGED Viewed

@@ -23,20 +23,12 @@ class CreditGetter(Obo):
     ontology = PREFIX
     static_version = "2022"
-    idspaces = {
-        PREFIX: "https://credit.niso.org/contributor-roles/",
-    }
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return get_terms(force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get RGD as OBO."""
-    return CreditGetter(force=force)
 def get_terms(force: bool = False) -> list[Term]:
     """Get terms from the Contributor Roles Taxonomy via GitHub."""
     path = ensure_path(PREFIX, url=url, name="picklist-api.json", force=force)
@@ -65,4 +57,4 @@ def get_terms(force: bool = False) -> list[Term]:
 if __name__ == "__main__":
-    get_obo(force=True).write_default(write_obo=True)
+    CreditGetter.cli()

pyobo/sources/cvx.py CHANGED Viewed

@@ -1,11 +1,13 @@
 """Converter for CVX."""
+import re
 from collections import defaultdict
 from collections.abc import Iterable
 import pandas as pd
-from pyobo import Obo, Reference, Term
+from pyobo import Obo, Reference, Term, TypeDef, default_reference
+from pyobo.struct.struct import acronym
 __all__ = [
     "CVXGetter",
@@ -13,6 +15,12 @@ __all__ = [
 cvx_url = "https://www2a.cdc.gov/vaccines/iis/iisstandards/downloads/cvx.txt"
 PREFIX = "cvx"
+STATUS = TypeDef(
+    reference=default_reference(PREFIX, "status", name="has status"), is_metadata_tag=True
+)
+NONVACCINE = TypeDef(reference=default_reference(PREFIX, "nonvaccine"), is_metadata_tag=True)
+ACRONYM_RE = re.compile("^[A-Z]+$")
 class CVXGetter(Obo):
@@ -20,6 +28,8 @@ class CVXGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
+    synonym_typedefs = [acronym]
+    typedefs = [STATUS, NONVACCINE]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -71,8 +81,20 @@ def iter_terms() -> Iterable[Term]:
             reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
             is_obsolete=is_obsolete,
         )
-        if short_name != full_name:
-            term.append_synonym(short_name)
+        if (
+            short_name.casefold()
+            == full_name.casefold()
+            .replace("virus vaccine", "")
+            .replace("vaccine", "")
+            .replace("  ", " ")
+            .strip()
+        ):
+            pass
+        elif short_name != full_name:
+            if ACRONYM_RE.match(short_name):
+                term.append_exact_synonym(short_name, type=acronym.reference)
+            else:
+                term.append_synonym(short_name)
         if pd.notna(notes):
             term.append_comment(notes)
         if is_obsolete:
@@ -80,9 +102,9 @@ def iter_terms() -> Iterable[Term]:
             if replacement_identifier:
                 term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
         if pd.notna(status):
-            term.append_property("status", status)
+            term.annotate_string(STATUS, status)
         if pd.notna(nonvaccine):
-            term.append_property("nonvaccine", nonvaccine)
+            term.annotate_boolean(NONVACCINE, nonvaccine)
         terms[cvx] = term
     for child, parents in dd.items():

pyobo/sources/depmap.py CHANGED Viewed

@@ -1,7 +1,6 @@
 """DepMap cell lines."""
 from collections.abc import Iterable
-from typing import Optional
 import pandas as pd
 import pystow
@@ -10,7 +9,6 @@ from pyobo import Obo, Reference, Term
 from pyobo.struct.typedef import exact_match
 __all__ = [
-    "get_obo",
     "DepMapGetter",
 ]
@@ -30,28 +28,24 @@ class DepMapGetter(Obo):
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get DepMap cell lines as OBO."""
-    return DepMapGetter(force=force)
-def get_url(version: Optional[str] = None) -> str:
+def get_url(version: str | None = None) -> str:
     """Get the URL for the given version of the DepMap cell line metadata file.
     :param version: The version of the data
     :returns: The URL as a string for downloading the dat
     .. warning::
-        This does not currently take the version into account. Need to write a crawler since data is not easy
-        to access.
+        This does not currently take the version into account. Need to write a crawler
+        since data is not easy to access.
     """
     #: This is the DepMap Public 21Q2 version. There isn't a way to do this automatically without writing a crawler
     url = "https://ndownloader.figshare.com/files/27902376"
     return url
-def _fix_mangled_int(x: str) -> Optional[str]:
+def _fix_mangled_int(x: str) -> str | None:
     return str(int(float(x))) if pd.notna(x) else None
@@ -74,7 +68,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         columns
     ].values:
         if pd.isna(name):
-            name = None
+            if pd.notna(sname):
+                name, sname = sname, None
+            else:
+                name = None
         term = Term.from_triple(PREFIX, identifier, name)
         if pd.notna(sname):
             term.append_synonym(sname)

pyobo/sources/dictybase_gene.py CHANGED Viewed

@@ -9,7 +9,7 @@ from collections.abc import Iterable
 import pandas as pd
 from tqdm.auto import tqdm
-from pyobo.struct import Obo, Synonym, Term, from_species, has_gene_product
+from pyobo.struct import Obo, Term, from_species, has_gene_product
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -41,11 +41,6 @@ class DictybaseGetter(Obo):
         return get_terms(force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get dictyBase Gene as OBO."""
-    return DictybaseGetter(force=force)
 def get_terms(force: bool = False) -> Iterable[Term]:
     """Get terms."""
     # TODO the mappings file has actually no uniprot at all, and requires text mining
@@ -67,7 +62,7 @@ def get_terms(force: bool = False) -> Iterable[Term]:
                 term.append_synonym(synonym.strip())
         if synonyms and pd.notna(synonyms):
             for synonym in synonyms.split(","):
-                term.append_synonym(Synonym(synonym.strip()))
+                term.append_synonym(synonym.strip())
         # for uniprot_id in uniprot_mappings.get(identifier, []):
         #     if not uniprot_id or pd.isna(uniprot_id) or uniprot_id in {"unknown", "pseudogene"}:
         #         continue

pyobo/sources/drugbank/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Resources from DrugBank."""
+from .drugbank import DrugBankGetter
+from .drugbank_salt import DrugBankSaltGetter
+__all__ = [
+    "DrugBankGetter",
+    "DrugBankSaltGetter",
+]

pyobo/sources/{drugbank.py → drugbank/drugbank.py} RENAMED Viewed

@@ -8,17 +8,17 @@ import itertools as itt
 import logging
 from collections.abc import Iterable, Mapping
 from functools import lru_cache
-from typing import Any, Optional
+from typing import Any
 from xml.etree import ElementTree
 import pystow
 from tqdm.auto import tqdm
-from ..getters import NoBuildError
-from ..struct import Obo, Reference, Term
-from ..struct.typedef import has_inchi, has_salt, has_smiles
-from ..utils.cache import cached_pickle
-from ..utils.path import prefix_directory_join
+from ...getters import NoBuildError
+from ...struct import Obo, Reference, Term
+from ...struct.typedef import has_inchi, has_salt, has_smiles
+from ...utils.cache import cached_pickle
+from ...utils.path import prefix_directory_join
 __all__ = [
     "DrugBankGetter",
@@ -40,11 +40,6 @@ class DrugBankGetter(Obo):
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get DrugBank as OBO."""
-    return DrugBankGetter(force=force)
 def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Iterate over DrugBank terms in OBO."""
     for drug_info in iterate_drug_info(version, force=force):
@@ -120,13 +115,13 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
         if identifier:
             term.append_xref(Reference(prefix=xref_prefix, identifier=identifier))
-    for prop, debio_curie in [("smiles", has_smiles), ("inchi", has_inchi)]:
-        identifier = drug_info.get(prop)
+    for key, typedef_ in [("smiles", has_smiles), ("inchi", has_inchi)]:
+        identifier = drug_info.get(key)
         if identifier:
-            term.append_property(debio_curie, identifier)
+            term.annotate_string(typedef_, identifier)
     for salt in drug_info.get("salts", []):
-        term.append_relationship(
+        term.annotate_object(
             has_salt,
             Reference(
                 prefix="drugbank.salt",
@@ -139,7 +134,7 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
 @lru_cache
-def get_xml_root(version: Optional[str] = None) -> ElementTree.Element:
+def get_xml_root(version: str | None = None) -> ElementTree.Element:
     """Get the DrugBank XML parser root.
     Takes between 35-60 seconds.

pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} RENAMED Viewed

@@ -2,8 +2,8 @@
 Run with ``python -m pyobo.sources.drugbank_salt``
-Get relations between drugbank salts and drugbank parents with
-``pyobo relations drugbank --relation obo:has_salt`` or
+Get relations between drugbank salts and drugbank parents with ``pyobo relations
+drugbank --relation obo:has_salt`` or
 .. code-block:: python
@@ -16,7 +16,7 @@ import logging
 from collections.abc import Iterable
 from .drugbank import iterate_drug_info
-from ..struct import Obo, Reference, Term
+from ...struct import Obo, Reference, Term
 __all__ = [
     "DrugBankSaltGetter",
@@ -38,11 +38,6 @@ class DrugBankSaltGetter(Obo):
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get DrugBank Salts as OBO."""
-    return DrugBankSaltGetter(force=force)
 def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Iterate over DrugBank Salt terms in OBO."""
     for drug_info in iterate_drug_info(version, force=force):

pyobo/sources/drugcentral.py CHANGED Viewed

@@ -7,6 +7,7 @@ from contextlib import closing
 import bioregistry
 import psycopg2
+from pydantic import ValidationError
 from tqdm.auto import tqdm
 from pyobo.struct import Obo, Reference, Synonym, Term
@@ -32,18 +33,13 @@ class DrugCentralGetter(Obo):
     """An ontology representation of the DrugCentral database."""
     ontology = bioversions_key = PREFIX
-    typedefs = [exact_match]
+    typedefs = [exact_match, has_inchi, has_smiles]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return iter_terms()
-def get_obo(force: bool = False) -> Obo:
-    """Get DrugCentral OBO."""
-    return DrugCentralGetter(force=force)
 def iter_terms() -> Iterable[Term]:
     """Iterate over DrugCentral terms."""
     with closing(psycopg2.connect(**PARAMS)) as conn:
@@ -71,10 +67,18 @@ def iter_terms() -> Iterable[Term]:
                 if xref_prefix_norm == "pdb.ligand":
                     # there is a weird invalid escaped \W appearing in pdb ligand ids
                     identifier = identifier.strip()
-                identifier = bioregistry.standardize_identifier(xref_prefix_norm, identifier)
-                xrefs[str(drugcentral_id)].append(
-                    Reference(prefix=xref_prefix_norm, identifier=identifier)
-                )
+                try:
+                    xref = Reference(prefix=xref_prefix_norm, identifier=identifier)
+                except ValidationError:
+                    # TODO mmsl is systematically incorrect, figure this out
+                    if xref_prefix_norm != "mmsl":
+                        tqdm.write(
+                            f"[drugcentral:{drugcentral_id}] had invalid xref: {prefix}:{identifier}"
+                        )
+                    continue
+                else:
+                    xrefs[str(drugcentral_id)].append(xref)
         with closing(conn.cursor()) as cur:
             cur.execute("SELECT id, name FROM public.synonyms")
             synonyms: defaultdict[str, list[Synonym]] = defaultdict(list)
@@ -85,16 +89,16 @@ def iter_terms() -> Iterable[Term]:
         drugcentral_id = str(drugcentral_id)
         term = Term(
             reference=Reference(prefix=PREFIX, identifier=drugcentral_id, name=name),
-            definition=definition,
+            definition=definition.replace("\n", " ") if definition else None,
             synonyms=synonyms.get(drugcentral_id, []),
             xrefs=xrefs.get(drugcentral_id, []),
         )
         if inchi_key:
             term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
         if smiles:
-            term.append_property(has_smiles, smiles)
+            term.annotate_string(has_smiles, smiles)
         if inchi:
-            term.append_property(has_inchi, inchi)
+            term.annotate_string(has_inchi, inchi)
         if cas:
             term.append_exact_match(Reference(prefix="cas", identifier=cas))
         yield term

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl