PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/sources/reactome.py CHANGED Viewed

@@ -11,7 +11,7 @@ from tqdm.auto import tqdm
 from ..api import get_id_multirelations_mapping
 from ..constants import SPECIES_REMAPPING
 from ..resources.ncbitaxon import get_ncbitaxon_id
-from ..struct import Obo, Reference, Term, from_species, has_participant
+from ..struct import Obo, Reference, Term, from_species, has_citation, has_participant
 from ..utils.io import multidict
 from ..utils.path import ensure_df
@@ -31,18 +31,13 @@ class ReactomeGetter(Obo):
     """An ontology representation of the Reactome pathway database."""
     ontology = bioversions_key = PREFIX
-    typedefs = [from_species, has_participant]
+    typedefs = [from_species, has_participant, has_citation]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get Reactome OBO."""
-    return ReactomeGetter(force=force)
 def ensure_participant_df(version: str, force: bool = False) -> pd.DataFrame:
     """Get the pathway uniprot participant dataframe."""
     uniprot_pathway_url = f"https://reactome.org/download/{version}/UniProt2Reactome_All_Levels.txt"
@@ -76,11 +71,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
     for reactome_id, name, species_name, taxonomy_id in it:
         terms[reactome_id] = term = Term(
             reference=Reference(prefix=PREFIX, identifier=reactome_id, name=name),
-            provenance=[
-                Reference(prefix="pubmed", identifier=pubmed_id)
-                for pubmed_id in provenance_d.get(reactome_id, [])
-            ],
         )
+        for pubmed_id in provenance_d.get(reactome_id, []):
+            term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
         if not taxonomy_id or pd.isna(taxonomy_id):
             raise ValueError(f"unmapped species: {species_name}")

pyobo/sources/rgd.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import logging
 from collections.abc import Iterable
-from typing import Optional
 import pandas as pd
 from tqdm.auto import tqdm
@@ -10,10 +9,11 @@ from tqdm.auto import tqdm
 from pyobo.struct import (
     Obo,
     Reference,
-    Synonym,
     SynonymTypeDef,
     Term,
+    default_reference,
     from_species,
+    has_citation,
     has_gene_product,
     transcribes_to,
 )
@@ -22,8 +22,8 @@ from pyobo.utils.path import ensure_df
 logger = logging.getLogger(__name__)
 PREFIX = "rgd"
-old_symbol_type = SynonymTypeDef.from_text("old_symbol")
-old_name_type = SynonymTypeDef.from_text("old_name")
+old_symbol_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_symbol"))
+old_name_type = SynonymTypeDef(reference=default_reference(PREFIX, "old_name"))
 # NOTE unigene id was discontinue in January 18th, 2021 dump
@@ -73,7 +73,7 @@ class RGDGetter(Obo):
     """An ontology representation of RGD's rat gene nomenclature."""
     bioversions_key = ontology = PREFIX
-    typedefs = [from_species, transcribes_to, has_gene_product]
+    typedefs = [from_species, transcribes_to, has_gene_product, has_citation]
     synonym_typedefs = [old_name_type, old_symbol_type]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -81,11 +81,6 @@ class RGDGetter(Obo):
         return get_terms(force=force, version=self._version_or_raise)
-def get_obo(force: bool = False) -> Obo:
-    """Get RGD as OBO."""
-    return RGDGetter(force=force)
 namespace_to_column = [
     ("ensembl", "ENSEMBL_ID"),
     ("uniprot", "UNIPROT_ID"),
@@ -93,7 +88,7 @@ namespace_to_column = [
 ]
-def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Term]:
+def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term]:
     """Get RGD terms."""
     df = ensure_df(
         PREFIX,
@@ -124,11 +119,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
         old_names = row["OLD_NAME"]
         if old_names and pd.notna(old_names):
             for old_name in old_names.split(";"):
-                term.append_synonym(Synonym(name=old_name, type=old_name_type))
+                term.append_synonym(old_name, type=old_name_type)
         old_symbols = row["OLD_SYMBOL"]
         if old_symbols and pd.notna(old_symbols):
             for old_symbol in old_symbols.split(";"):
-                term.append_synonym(Synonym(name=old_symbol, type=old_symbol_type))
+                term.append_synonym(old_symbol, type=old_symbol_type)
         for prefix, key in namespace_to_column:
             xref_ids = str(row[key])
             if xref_ids and pd.notna(xref_ids):
@@ -136,7 +131,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
                     if xref_id == "nan":
                         continue
                     if prefix == "uniprot":
-                        term.append_relationship(
+                        term.annotate_object(
                             has_gene_product, Reference(prefix=prefix, identifier=xref_id)
                         )
                     elif prefix == "ensembl":
@@ -144,11 +139,11 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te
                             # second one is reverse strand
                             term.append_xref(Reference(prefix=prefix, identifier=xref_id))
                         elif xref_id.startswith("ENSMUST"):
-                            term.append_relationship(
+                            term.annotate_object(
                                 transcribes_to, Reference(prefix=prefix, identifier=xref_id)
                             )
                         elif xref_id.startswith("ENSMUSP"):
-                            term.append_relationship(
+                            term.annotate_object(
                                 has_gene_product, Reference(prefix=prefix, identifier=xref_id)
                             )
                         else:

pyobo/sources/rhea.py CHANGED Viewed

@@ -2,23 +2,13 @@
 import logging
 from collections.abc import Iterable
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Any, cast
 import pystow
 from pyobo.api.utils import get_version
-from pyobo.struct import Obo, Reference, Term
-from pyobo.struct.typedef import (
-    TypeDef,
-    enabled_by,
-    has_bidirectional_reaction,
-    has_input,
-    has_left_to_right_reaction,
-    has_output,
-    has_participant,
-    has_right_to_left_reaction,
-    reaction_enabled_by_molecular_function,
-)
+from pyobo.struct import Obo, Reference, Term, TypeDef
+from pyobo.struct import typedef as v
 from pyobo.utils.path import ensure_df
 if TYPE_CHECKING:
@@ -32,6 +22,16 @@ logger = logging.getLogger(__name__)
 PREFIX = "rhea"
 RHEA_RDF_GZ_URL = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"
+has_left_to_right_reaction = TypeDef.default(
+    PREFIX, "hasLeftToRightReaction", name="has left to right reaction", is_metadata_tag=True
+).append_xref(v.has_left_to_right_reaction)
+has_right_to_left_reaction = TypeDef.default(
+    PREFIX, "hasRightToLeftReaction", name="has right to left reaction", is_metadata_tag=True
+).append_xref(v.has_right_to_left_reaction)
+has_bidirectional_reaction = TypeDef.default(
+    PREFIX, "hasBidirectionalReaction", name="has bidirectional reaction", is_metadata_tag=True
+).append_xref(v.has_bidirectional_reaction)
 class RheaGetter(Obo):
     """An ontology representation of Rhea's chemical reaction database."""
@@ -41,11 +41,11 @@ class RheaGetter(Obo):
         has_left_to_right_reaction,
         has_bidirectional_reaction,
         has_right_to_left_reaction,
-        enabled_by,
-        has_input,
-        has_output,
-        has_participant,
-        reaction_enabled_by_molecular_function,
+        v.enabled_by,
+        v.has_input,
+        v.has_output,
+        v.has_participant,
+        v.reaction_enabled_by_molecular_function,
     ]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -53,16 +53,13 @@ class RheaGetter(Obo):
         return iter_terms(version=self._version_or_raise, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get Rhea as OBO."""
-    return RheaGetter(force=force)
-def ensure_rhea_rdf(version: Optional[str] = None, force: bool = False) -> "rdflib.Graph":
+def ensure_rhea_rdf(version: str | None = None, force: bool = False) -> "rdflib.Graph":
     """Get the Rhea RDF graph."""
     # see docs: https://ftp.expasy.org/databases/rhea/rdf/rhea_rdf_documentation.pdf
     if version is None:
         version = get_version(PREFIX)
+    if version is None:
+        raise ValueError
     return pystow.ensure_rdf(
         "pyobo",
         "raw",
@@ -100,7 +97,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         }
     """
     )
-    names = {str(identifier): str(name) for _, identifier, name in result}
+    names = {
+        str(identifier): str(name)
+        for _, identifier, name in cast(Iterable[tuple[Any, str, str]], result)
+    }
     terms: dict[str, Term] = {}
     master_to_left: dict[str, str] = {}
@@ -145,8 +145,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
       ?compound rh:chebi|rh:underlyingChebi|(rh:reactivePart/rh:chebi) ?chebi .
     }
     """
-    for master_rhea_id, side_uri, chebi_uri in graph.query(sparql):
-        master_rhea_id = str(master_rhea_id)
+    results = cast(Iterable[tuple[int, str, str]], graph.query(sparql))
+    for master_rhea_id_int, side_uri, chebi_uri in results:
+        master_rhea_id = str(master_rhea_id_int)
         chebi_reference = Reference(
             prefix="chebi", identifier=chebi_uri[len("http://purl.obolibrary.org/obo/CHEBI_") :]
         )
@@ -159,10 +160,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
             right_rhea_id = master_to_left[master_rhea_id]
         else:
             raise ValueError(f"Invalid side: {side_uri}")
-        terms[master_rhea_id].append_relationship(has_participant, chebi_reference)
-        terms[master_to_bi[master_rhea_id]].append_relationship(has_participant, chebi_reference)
-        terms[left_rhea_id].append_relationship(has_input, chebi_reference)
-        terms[right_rhea_id].append_relationship(has_output, chebi_reference)
+        terms[master_rhea_id].annotate_object(v.has_participant, chebi_reference)
+        terms[master_to_bi[master_rhea_id]].annotate_object(v.has_participant, chebi_reference)
+        terms[left_rhea_id].append_relationship(v.has_input, chebi_reference)
+        terms[right_rhea_id].append_relationship(v.has_output, chebi_reference)
     hierarchy = ensure_df(
         PREFIX,
@@ -181,8 +182,8 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         ("reactome", "rhea2reactome", None),
         ("macie", "rhea2macie", None),
         ("metacyc", "rhea2metacyc", None),
-        ("go", "rhea2go", reaction_enabled_by_molecular_function),
-        ("uniprot", "rhea2uniprot", enabled_by),
+        ("go", "rhea2go", v.reaction_enabled_by_molecular_function),
+        ("uniprot", "rhea2uniprot", v.enabled_by),
     ]:
         xref_df = ensure_df(
             PREFIX,
@@ -202,7 +203,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
                 )
                 continue
             target_reference = Reference(prefix=xref_prefix, identifier=xref_id)
-            if isinstance(relation, TypeDef):
+            if relation is not None:
                 terms[directional_rhea_id].append_relationship(relation, target_reference)
             else:
                 terms[directional_rhea_id].append_xref(target_reference)
@@ -223,11 +224,11 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
         _iubmb,
     ) in ec_df.values:
         terms[directional_rhea_id].append_relationship(
-            enabled_by, Reference(prefix="eccode", identifier=ec)
+            v.enabled_by, Reference(prefix="ec", identifier=ec)
         )
     yield from terms.values()
 if __name__ == "__main__":
-    RheaGetter().write_default(write_obo=True, force=True)
+    RheaGetter.cli(["--owl"])

pyobo/sources/ror.py CHANGED Viewed

@@ -3,16 +3,18 @@
 from __future__ import annotations
 import json
+import logging
 import zipfile
 from collections.abc import Iterable
 from typing import Any
 import bioregistry
 import zenodo_client
+from pydantic import ValidationError
 from tqdm.auto import tqdm
 from pyobo.struct import Obo, Reference, Term
-from pyobo.struct.struct import acronym
+from pyobo.struct.struct import CHARLIE_TERM, HUMAN_TERM, PYOBO_INJECTED, acronym
 from pyobo.struct.typedef import (
     has_homepage,
     has_part,
@@ -23,11 +25,13 @@ from pyobo.struct.typedef import (
     see_also,
 )
+logger = logging.getLogger(__name__)
 PREFIX = "ror"
 ROR_ZENODO_RECORD_ID = "10086202"
 # Constants
-ORG_CLASS = Reference(prefix="OBI", identifier="0000245")
+ORG_CLASS = Reference(prefix="OBI", identifier="0000245", name="organization")
+CITY_CLASS = Reference(prefix="ENVO", identifier="00000856", name="city")
 RMAP = {
     "Related": see_also,
@@ -52,16 +56,7 @@ class RORGetter(Obo):
     ontology = bioregistry_key = PREFIX
     typedefs = [has_homepage, *RMAP.values()]
     synonym_typedefs = [acronym]
-    idspaces = {
-        "ror": "https://ror.org/",
-        "geonames": "https://www.geonames.org/",
-        "ENVO": "http://purl.obolibrary.org/obo/ENVO_",
-        "BFO": "http://purl.obolibrary.org/obo/BFO_",
-        "RO": "http://purl.obolibrary.org/obo/RO_",
-        "OBI": "http://purl.obolibrary.org/obo/OBI_",
-        "OMO": "http://purl.obolibrary.org/obo/OMO_",
-        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-    }
+    root_terms = [CITY_CLASS, ORG_CLASS]
     def __post_init__(self):
         self.data_version, _url, _path = _get_info()
@@ -69,26 +64,40 @@ class RORGetter(Obo):
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return iterate_ror_terms(force=force)
-ROR_ORGANIZATION_TYPE_TO_OBI = {
-    "Education": ...,
-    "Facility": ...,
-    "Company": ...,
-    "Government": ...,
-    "Healthcare": ...,
-    "Other": ...,
-    "Archive": ...,
+        yield CHARLIE_TERM
+        yield HUMAN_TERM
+        yield Term(reference=ORG_CLASS)
+        yield Term(reference=CITY_CLASS)
+        yield from ROR_ORGANIZATION_TYPE_TO_OBI.values()
+        yield from iterate_ror_terms(force=force)
+ROR_ORGANIZATION_TYPE_TO_OBI: dict[str, Term] = {
+    "Education": Term.default(PREFIX, "education", "educational organization"),
+    "Facility": Term.default(PREFIX, "facility", "facility"),
+    "Company": Term.default(PREFIX, "company", "company"),
+    "Government": Term.default(PREFIX, "government", "government organization"),
+    "Healthcare": Term.default(PREFIX, "healthcare", "healthcare organization"),
+    "Archive": Term.default(PREFIX, "archive", "archival organization"),
+    "Nonprofit": Term.default(PREFIX, "healthcare", "nonprofit organization")
+    .append_xref(Reference(prefix="ICO", identifier="0000048"))
+    .append_xref(Reference(prefix="GSSO", identifier="004615")),
 }
+for _k, v in ROR_ORGANIZATION_TYPE_TO_OBI.items():
+    v.append_parent(ORG_CLASS)
+    v.append_contributor(CHARLIE_TERM)
+    v.append_comment(PYOBO_INJECTED)
 _MISSED_ORG_TYPES: set[str] = set()
 def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
     """Iterate over terms in ROR."""
-    version, source_uri, records = get_latest(force=force)
-    unhandled_xref_prefixes = set()
-    for record in tqdm(records, unit_scale=True, unit="record", desc=PREFIX):
+    _version, _source_uri, records = get_latest(force=force)
+    unhandled_xref_prefixes: set[str] = set()
+    seen_geonames_references = set()
+    for record in tqdm(records, unit_scale=True, unit="record", desc=f"{PREFIX} v{_version}"):
         identifier = record["id"].removeprefix("https://ror.org/")
         name = record["name"]
         name = NAME_REMAPPING.get(name, name)
@@ -103,13 +112,14 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
             type="Instance",
             definition=description,
         )
-        term.append_parent(ORG_CLASS)
-        # TODO replace term.append_parent(ORG_CLASS) with:
-        # for organization_type in organization_types:
-        #     term.append_parent(ORG_PARENTS[organization_type])
+        for organization_type in organization_types:
+            if organization_type == "Other":
+                term.append_parent(ORG_CLASS)
+            else:
+                term.append_parent(ROR_ORGANIZATION_TYPE_TO_OBI[organization_type])
         for link in record.get("links", []):
-            term.append_property(has_homepage, link)
+            term.annotate_uri(has_homepage, link)
         if name.startswith("The "):
             term.append_synonym(name.removeprefix("The "))
@@ -120,23 +130,29 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
                 RMAP[relationship["type"]], Reference(prefix=PREFIX, identifier=target_id)
             )
-        term.is_obsolete = record.get("status") != "active"
+        if record.get("status") != "active":
+            term.is_obsolete = True
         for address in record.get("addresses", []):
             city = address.get("geonames_city")
             if not city:
                 continue
-            term.append_relationship(
-                RMAP["Located in"], Reference(prefix="geonames", identifier=str(city["id"]))
+            geonames_reference = Reference(
+                prefix="geonames", identifier=str(city["id"]), name=city["city"]
             )
-        for label in record.get("labels", []):
-            label = label["label"]  # there's a language availabel in this dict too
-            term.append_synonym(label)
+            seen_geonames_references.add(geonames_reference)
+            term.append_relationship(RMAP["Located in"], geonames_reference)
+        for label_dict in record.get("labels", []):
+            label = label_dict["label"]
+            label = label.strip().replace("\n", " ")
+            language = label_dict["iso639"]
+            term.append_synonym(label, language=language)
             if label.startswith("The "):
-                term.append_synonym(label.removeprefix("The "))
+                term.append_synonym(label.removeprefix("The "), language=language)
         for synonym in record.get("aliases", []):
+            synonym = synonym.strip().replace("\n", " ")
             term.append_synonym(synonym)
             if synonym.startswith("The "):
                 term.append_synonym(synonym.removeprefix("The "))
@@ -162,10 +178,21 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
             if isinstance(identifiers, str):
                 identifiers = [identifiers]
             for xref_id in identifiers:
-                term.append_xref(Reference(prefix=norm_prefix, identifier=xref_id.replace(" ", "")))
+                xref_id = xref_id.replace(" ", "")
+                try:
+                    xref = Reference(prefix=norm_prefix, identifier=xref_id)
+                except ValidationError:
+                    tqdm.write(f"[{term.curie}] invalid xref: {norm_prefix}:{xref_id}")
+                else:
+                    term.append_xref(xref)
         yield term
+    for geonames_ref in sorted(seen_geonames_references):
+        geonames_term = Term(reference=geonames_ref, type="Instance")
+        geonames_term.append_parent(CITY_CLASS)
+        yield geonames_term
 def _get_info(*, force: bool = False):
     client = zenodo_client.Zenodo()
@@ -193,7 +220,7 @@ def get_latest(*, force: bool = False):
 def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
     """Get a mapping of ROR ids to GeoNames IDs for countries."""
-    from pyobo.sources.geonames import get_city_to_country
+    from pyobo.sources.geonames.geonames import get_city_to_country
     city_to_country = get_city_to_country()
     rv = {}
@@ -207,4 +234,4 @@ def get_ror_to_country_geonames(**kwargs: Any) -> dict[str, str]:
 if __name__ == "__main__":
-    RORGetter(force=True).write_default(write_obo=True, force=True)
+    RORGetter.cli()

pyobo/sources/selventa/__init__.py CHANGED Viewed

File without changes

pyobo/sources/selventa/schem.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Selventa chemicals.
-.. seealso:: https://github.com/pyobo/pyobo/issues/27
+.. seealso::
+    https://github.com/pyobo/pyobo/issues/27
 """
 from collections.abc import Iterable
@@ -29,11 +31,6 @@ class SCHEMGetter(Obo):
         return iter_terms(force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get Selventa chemical as OBO."""
-    return SCHEMGetter(force=force)
 def iter_terms(force: bool = False) -> Iterable[Term]:
     """Iterate over selventa chemical terms."""
     df = ensure_df(PREFIX, url=URL, skiprows=8, force=force)
@@ -45,4 +42,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
 if __name__ == "__main__":
-    get_obo().write_default(write_obo=True, force=True)
+    SCHEMGetter.cli()

pyobo/sources/selventa/scomp.py CHANGED Viewed

@@ -26,11 +26,6 @@ class SCOMPGetter(Obo):
         return iter_terms(force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get Selventa Complexes as OBO."""
-    return SCOMPGetter(force=force)
 def iter_terms(force: bool = False) -> Iterable[Term]:
     """Iterate over selventa complex terms."""
     df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -54,4 +49,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
 if __name__ == "__main__":
-    get_obo().write_default(write_obo=True, force=True)
+    SCOMPGetter.cli()

pyobo/sources/selventa/sdis.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """Selventa diseases.
-.. seealso:: https://github.com/pyobo/pyobo/issues/26
+.. seealso::
+    https://github.com/pyobo/pyobo/issues/26
 """
 from collections.abc import Iterable
@@ -29,11 +31,6 @@ class SDISGetter(Obo):
         return iter_terms(force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get Selventa Diseases as OBO."""
-    return SDISGetter(force=force)
 def iter_terms(force: bool = False) -> Iterable[Term]:
     """Iterate over selventa disease terms."""
     df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -48,4 +45,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
 if __name__ == "__main__":
-    get_obo().write_default(write_obo=True, force=True)
+    SDISGetter.cli()

pyobo/sources/selventa/sfam.py CHANGED Viewed

@@ -26,11 +26,6 @@ class SFAMGetter(Obo):
         return iter_terms(force=force)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get Selventa Families as OBO."""
-    return SFAMGetter(force=force)
 def iter_terms(force: bool = False) -> Iterable[Term]:
     """Iterate over selventa family terms."""
     df = ensure_df(PREFIX, url=URL, skiprows=9, force=force)
@@ -52,4 +47,4 @@ def iter_terms(force: bool = False) -> Iterable[Term]:
 if __name__ == "__main__":
-    get_obo().write_default(write_obo=True, force=True)
+    SFAMGetter.cli()

pyobo/sources/sgd.py CHANGED Viewed

@@ -3,8 +3,10 @@
 from collections.abc import Iterable
 from urllib.parse import unquote_plus
+from pystow.utils import read_tarfile_csv
 from ..struct import Obo, Reference, Synonym, Term, from_species
-from ..utils.path import ensure_tar_df
+from ..utils.path import ensure_path
 __all__ = [
     "SGDGetter",
@@ -31,24 +33,17 @@ class SGDGetter(Obo):
         yield from get_terms(self, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get SGD as OBO."""
-    return SGDGetter(force=force)
 def get_terms(ontology: Obo, force: bool = False) -> Iterable[Term]:
     """Get SGD terms."""
-    df = ensure_tar_df(
-        prefix=PREFIX,
-        url=URL,
+    path = ensure_path(PREFIX, url=URL, version=ontology._version_or_raise, force=force)
+    df = read_tarfile_csv(
+        path,
         inner_path=INNER_PATH,
         sep="\t",
         skiprows=18,
         header=None,
         names=HEADER,
-        force=force,
         dtype=str,
-        version=ontology._version_or_raise,
     )
     df = df[df["feature"] == "gene"]
     for data in df["data"]:

pyobo/sources/signor/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Sources from `SIGNOR <https://signor.uniroma2.it/>`_."""
+from .signor_complexes import SignorGetter
+__all__ = [
+    "SignorGetter",
+]

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl