PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/METADATA +228 -229
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
pyobo-0.12.0.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/sources/expasy.py CHANGED Viewed

@@ -4,18 +4,18 @@ import logging
 import re
 from collections import defaultdict
 from collections.abc import Iterable, Mapping
-from typing import Any, Optional
+from typing import Any
 from .utils import get_go_mapping
-from ..struct import Obo, Reference, Synonym, Term
-from ..struct.typedef import enables, has_member, term_replaced_by
+from ..struct import Annotation, Obo, OBOLiteral, Reference, Synonym, Term
+from ..struct.typedef import enables, has_member, has_source, term_replaced_by
 from ..utils.path import ensure_path
 __all__ = [
     "ExpasyGetter",
 ]
-PREFIX = "eccode"
+PREFIX = "ec"
 EXPASY_DATABASE_URL = "ftp://ftp.expasy.org/databases/enzyme/enzyme.dat"
 EXPASY_TREE_URL = "ftp://ftp.expasy.org/databases/enzyme/enzclass.txt"
@@ -43,33 +43,23 @@ class ExpasyGetter(Obo):
     """A getter for ExPASy Enzyme Classes."""
     bioversions_key = ontology = PREFIX
-    typedefs = [has_member, enables, term_replaced_by]
+    typedefs = [has_member, enables, term_replaced_by, has_source]
     root_terms = [
-        Reference(prefix="eccode", identifier="1"),
-        Reference(prefix="eccode", identifier="2"),
-        Reference(prefix="eccode", identifier="3"),
-        Reference(prefix="eccode", identifier="4"),
-        Reference(prefix="eccode", identifier="5"),
-        Reference(prefix="eccode", identifier="6"),
-        Reference(prefix="eccode", identifier="7"),
+        Reference(prefix=PREFIX, identifier="1"),
+        Reference(prefix=PREFIX, identifier="2"),
+        Reference(prefix=PREFIX, identifier="3"),
+        Reference(prefix=PREFIX, identifier="4"),
+        Reference(prefix=PREFIX, identifier="5"),
+        Reference(prefix=PREFIX, identifier="6"),
+        Reference(prefix=PREFIX, identifier="7"),
     ]
-    idspaces = {
-        "uniprot": "https://bioregistry.io/uniprot:",
-        "eccode": "https://bioregistry.io/eccode:",
-        "GO": "http://purl.obolibrary.org/obo/GO_",
-        "RO": "http://purl.obolibrary.org/obo/RO_",
-    }
+    property_values = [Annotation(has_source.reference, OBOLiteral.uri(EXPASY_DATABASE_URL))]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return get_terms(version=self._version_or_raise, force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get ExPASy as OBO."""
-    return ExpasyGetter(force=force)
 def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get the ExPASy terms."""
     tree_path = ensure_path(PREFIX, url=EXPASY_TREE_URL, version=version, force=force)
@@ -111,9 +101,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
                 reference=Reference(prefix=PREFIX, identifier=ec_code), is_obsolete=True
             )
             for transfer_id in transfer_ids:
-                term.append_relationship(
-                    term_replaced_by, Reference(prefix=PREFIX, identifier=transfer_id)
-                )
+                term.append_replaced_by(Reference(prefix=PREFIX, identifier=transfer_id))
             continue
         parent_ec_code = data["parent"]["identifier"]
@@ -142,16 +130,17 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             reference=Reference(prefix=PREFIX, identifier=ec_code, name=name),
             parents=[parent_term.reference],
             synonyms=synonyms,
+            definition=data.get("reaction"),
         )
         for domain in data.get("domains", []):
-            term.append_relationship(
+            term.annotate_object(
                 has_member,
                 Reference.model_validate(
                     {"prefix": domain["namespace"], "identifier": domain["identifier"]},
                 ),
             )
         for protein in data.get("proteins", []):
-            term.append_relationship(
+            term.annotate_object(
                 has_member,
                 Reference(
                     prefix=protein["namespace"],
@@ -167,18 +156,16 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     return terms.values()
-"""TREE"""
 def normalize_expasy_id(expasy_id: str) -> str:
     """Return a standardized ExPASy identifier string.
     :param expasy_id: A possibly non-normalized ExPASy identifier
+    :return: A normalized string.
     """
     return expasy_id.replace(" ", "")
-def give_edge(unnormalized_ec_code: str) -> tuple[int, Optional[str], str]:
+def give_edge(unnormalized_ec_code: str) -> tuple[int, str | None, str]:
     """Return a (parent, child) tuple for given id."""
     levels = [x for x in unnormalized_ec_code.replace(" ", "").replace("-", "").split(".") if x]
     level = len(levels)
@@ -220,10 +207,11 @@ def get_tree(lines: Iterable[str]):
     return rv
-def get_database(lines: Iterable[str]) -> Mapping:
+def get_database(lines: Iterable[str]) -> Mapping[str, dict[str, Any]]:
     """Parse the ExPASy database file and returns a list of enzyme entry dictionaries.
     :param lines: An iterator over the ExPASy database file or file-like
+    :returns: A mapping from EC code to data
     """
     rv = {}
     for groups in _group_by_id(lines):
@@ -256,7 +244,13 @@ def get_database(lines: Iterable[str]) -> Mapping:
                 value = value.strip().removesuffix("and").rstrip(",").strip()
                 ec_data_entry["transfer_id"] = _parse_transfer(value)
             elif descriptor == DE:
-                ec_data_entry["concept"]["name"] = value.rstrip(".")  # type:ignore
+                if "name" not in ec_data_entry["concept"]:
+                    ec_data_entry["concept"]["name"] = ""
+                ec_data_entry["concept"]["name"] += value.rstrip(".")  # type:ignore
+            elif descriptor == CA:
+                if "reaction" not in ec_data_entry:
+                    ec_data_entry["reaction"] = ""
+                ec_data_entry["reaction"] += value.rstrip(".")  # type:ignore
             elif descriptor == AN:
                 ec_data_entry["synonyms"].append(value.rstrip("."))  # type:ignore
             elif descriptor == PR:
@@ -290,6 +284,9 @@ TRANSFER_SPLIT_RE = re.compile(r",\s*|\s+and\s+")
 def _parse_transfer(value: str) -> list[str]:
     """Parse transferred entry string.
+    :param value: A string for a transferred entry
+    :returns: A list of EC codes that it got transferred to
     >>> _parse_transfer("Transferred entry: 1.1.1.198, 1.1.1.227 and 1.1.1.228.")
     ['1.1.1.198', '1.1.1.227', '1.1.1.228']
     """

pyobo/sources/famplex.py CHANGED Viewed

@@ -8,8 +8,8 @@ import bioregistry
 from pystow.utils import get_commit
 from pyobo import get_name_id_mapping
-from pyobo.struct import Obo, Reference, Term
-from pyobo.struct.typedef import has_member, has_part, is_a, part_of
+from pyobo.struct import Obo, Reference, Term, _parse_str_or_curie_or_uri
+from pyobo.struct.typedef import has_citation, has_member, has_part, is_a, part_of
 from pyobo.utils.io import multidict
 from pyobo.utils.path import ensure_df
@@ -23,7 +23,7 @@ class FamPlexGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [has_member, has_part, is_a, part_of]
+    typedefs = [has_member, has_part, is_a, part_of, has_citation]
     def _get_version(self) -> str:
         return get_commit("sorgerlab", "famplex")
@@ -33,11 +33,6 @@ class FamPlexGetter(Obo):
         return get_terms(force=force, version=self._version_or_raise)
-def get_obo(force: bool = False) -> Obo:
-    """Get FamPlex as OBO."""
-    return FamPlexGetter(force=force)
 def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     """Get the FamPlex terms."""
     base_url = f"https://raw.githubusercontent.com/sorgerlab/famplex/{version}"
@@ -106,33 +101,33 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
     for (entity,) in entities_df.values:
         reference = Reference(prefix=PREFIX, identifier=entity, name=entity)
         definition, provenance = id_to_definition.get(entity, (None, None))
-        provenance_reference = (
-            Reference.from_curie(provenance) if isinstance(provenance, str) else None
-        )
         term = Term(
             reference=reference,
             definition=definition,
-            provenance=[] if provenance_reference is None else [provenance_reference],
         )
+        provenance_reference = (
+            _parse_str_or_curie_or_uri(provenance) if isinstance(provenance, str) else None
+        )
+        if provenance_reference:
+            term.append_provenance(provenance_reference)
         for xref_reference in id_xrefs.get(entity, []):
             term.append_xref(xref_reference)
         for r, t in out_edges.get(reference, []):
-            if r == "isa" and t.prefix == "fplx":
+            if r == "isa":
                 term.append_parent(t)
-            elif r == "isa":
-                term.append_relationship(is_a, t)
             elif r == "partof":
-                term.append_relationship(part_of, t)
+                term.annotate_object(part_of, t)
             else:
                 logging.warning("unhandled relation %s", r)
         for r, h in in_edges.get(reference, []):
             if r == "isa":
-                term.append_relationship(has_member, h)
+                term.annotate_object(has_member, h)
             elif r == "partof":
-                term.append_relationship(has_part, h)
+                term.annotate_object(has_part, h)
             else:
                 logging.warning("unhandled relation %s", r)
         yield term

pyobo/sources/flybase.py CHANGED Viewed

@@ -8,7 +8,7 @@ from tqdm.auto import tqdm
 from pyobo import Reference
 from pyobo.resources.so import get_so_name
-from pyobo.struct import Obo, Term, from_species, orthologous
+from pyobo.struct import Obo, Term, _parse_str_or_curie_or_uri, from_species, orthologous
 from pyobo.utils.io import multisetdict
 from pyobo.utils.path import ensure_df
@@ -91,11 +91,6 @@ def _get_synonyms(version, force):
     return df  # TODO use this
-def get_obo(force: bool = False) -> Obo:
-    """Get OBO."""
-    return FlyBaseGetter(force=force)
 GTYPE_TO_SO = {
     "SRP_RNA_gene": "0001269",
     "protein_coding_gene": "0001217",
@@ -154,11 +149,11 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
         for hgnc_curie in human_orthologs.get(identifier, []):
             if not hgnc_curie or pd.isna(hgnc_curie):
                 continue
-            hgnc_ortholog = Reference.from_curie(hgnc_curie)
+            hgnc_ortholog = _parse_str_or_curie_or_uri(hgnc_curie)
             if hgnc_ortholog is None:
                 tqdm.write(f"[{PREFIX}] {identifier} had invalid ortholog: {hgnc_curie}")
             else:
-                term.append_relationship(orthologous, hgnc_ortholog)
+                term.annotate_object(orthologous, hgnc_ortholog)
         taxonomy_id = abbr_to_taxonomy.get(organism)
         if taxonomy_id is not None:
             term.set_species(taxonomy_id)

pyobo/sources/gard.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""Converter for GARD."""
+from collections.abc import Iterable
+import requests
+from pyobo.struct import Obo, Term, default_reference
+__all__ = [
+    "GARDGetter",
+]
+PREFIX = "gard"
+PP = "gard.category"
+URL = "https://rarediseases.info.nih.gov/assets/diseases.trimmed.json"
+class GARDGetter(Obo):
+    """An ontology representation of GARD."""
+    bioversions_key = ontology = PREFIX
+    dynamic_version = True
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over gene terms for GARD."""
+        yield from get_terms()
+def get_terms() -> Iterable[Term]:
+    """Get GARD terms."""
+    rows = requests.get(URL, timeout=5).json()
+    categories = {
+        category: default_reference(
+            prefix=PREFIX, identifier=category.lower().replace(" ", "_"), name=category
+        )
+        for row in rows
+        for category in row.get("diseaseCategories", [])
+    }
+    categories["uncategorized"] = default_reference(
+        prefix=PREFIX, identifier="uncategorized", name="Uncategorized Disease"
+    )
+    for category_reference in categories.values():
+        yield Term(reference=category_reference)
+    for row in rows:
+        term = Term.from_triple(PREFIX, identifier=str(row.pop("id")), name=row.pop("name"))
+        _name = row.pop("encodedName", None)
+        for synonym in row.pop("synonyms", []):
+            synonym = synonym.strip()
+            if synonym:
+                term.append_synonym(synonym)
+        for category in row.pop("diseaseCategories", ["uncategorized"]):
+            term.append_parent(categories[category])
+        _spanish_id = row.pop("spanishId", None)
+        _spanish_name = row.pop("spanishName", None)
+        yield term
+if __name__ == "__main__":
+    GARDGetter().cli()

pyobo/sources/geonames/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Sources from GeoNames."""
+from .features import GeonamesFeatureGetter
+from .geonames import GeonamesGetter
+__all__ = [
+    "GeonamesFeatureGetter",
+    "GeonamesGetter",
+]

pyobo/sources/geonames/features.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Get terms from GeoNames Features."""
+from __future__ import annotations
+import logging
+from collections.abc import Iterable
+from pyobo import Obo, Term
+from pyobo.sources.geonames.utils import PREFIX_FEATURE, get_feature_terms
+__all__ = ["GeonamesFeatureGetter"]
+logger = logging.getLogger(__name__)
+class GeonamesFeatureGetter(Obo):
+    """An ontology representation of GeoNames features."""
+    ontology = PREFIX_FEATURE
+    dynamic_version = True
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        yield from get_feature_terms(force=force)
+if __name__ == "__main__":
+    GeonamesFeatureGetter.cli()

pyobo/sources/{geonames.py → geonames/geonames.py} RENAMED Viewed

@@ -3,53 +3,81 @@
 from __future__ import annotations
 import logging
-from collections.abc import Collection, Iterable, Mapping
+from collections.abc import Iterable, Mapping
 import pandas as pd
 from pystow.utils import read_zipfile_csv
 from tqdm import tqdm
 from pyobo import Obo, Term
-from pyobo.struct import Reference, part_of
+from pyobo.sources.geonames.utils import (
+    ADMIN1_URL,
+    ADMIN2_URL,
+    ADMIN_1,
+    ADMIN_2,
+    CITIES_URL,
+    CITY,
+    CODE_TYPEDEF,
+    COUNTRIES_URL,
+    FEATURE_TERM,
+    NATION,
+    P_CATEGORY,
+    PREFIX,
+    PREFIX_FEATURE,
+    SYNONYMS_DF_COLUMNS,
+    SYNONYMS_URL,
+    get_feature_terms,
+)
+from pyobo.struct import Reference, has_part, part_of
 from pyobo.utils.path import ensure_df, ensure_path
 __all__ = ["GeonamesGetter"]
 logger = logging.getLogger(__name__)
-PREFIX = "geonames"
-COUNTRIES_URL = "https://download.geonames.org/export/dump/countryInfo.txt"
-ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
-ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
-CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
 class GeonamesGetter(Obo):
     """An ontology representation of GeoNames."""
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [part_of]
+    typedefs = [part_of, CODE_TYPEDEF, has_part]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return get_terms(force=force)
-def get_terms(*, force: bool = False) -> Collection[Term]:
+def get_terms(*, force: bool = False) -> Iterable[Term]:
     """Get terms."""
+    yield Term(reference=NATION)
+    yield Term(reference=ADMIN_1).append_relationship(part_of, NATION)
+    yield Term(reference=ADMIN_2).append_relationship(part_of, ADMIN_1)
+    yield Term(reference=CITY)
+    # since the output here is only cities, we can slice this down
+    for term in get_feature_terms(force=force):
+        if term.identifier.startswith("P.") or term.pair == P_CATEGORY.pair or term == FEATURE_TERM:
+            yield term
     code_to_country = get_code_to_country(force=force)
+    yield from code_to_country.values()
     code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
+    yield from code_to_admin1.values()
     code_to_admin2 = get_code_to_admin2(
         code_to_country=code_to_country, code_to_admin1=code_to_admin1, force=force
     )
+    yield from code_to_admin2.values()
     id_to_term = get_cities(
         code_to_country=code_to_country,
         code_to_admin1=code_to_admin1,
         code_to_admin2=code_to_admin2,
         force=force,
     )
-    return id_to_term.values()
+    yield from list(id_to_term.values())
 def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
@@ -70,9 +98,13 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
     for identifier, name, code, fips, iso3 in countries_df[cols].values:
         if pd.isna(code):
             continue
-        term = Term.from_triple(
-            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        term = Term(
+            reference=Reference(
+                prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
+            ),
+            type="Instance",
         )
+        term.append_parent(NATION)
         term.append_synonym(code)
         if name.startswith("The "):
             term.append_synonym(name.removeprefix("The "))
@@ -80,7 +112,7 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
             term.append_synonym(fips)
         if pd.notna(iso3):
             term.append_synonym(iso3)
-        term.append_property("code", code)
+        term.annotate_string(CODE_TYPEDEF, code)
         code_to_country[code] = term
     logger.info(f"got {len(code_to_country):,} country records")
     return code_to_country
@@ -104,10 +136,14 @@ def get_code_to_admin1(
             tqdm.write(f"Missing info for  {name} / {asciiname} / {code=} / {identifier=}")
             continue
-        term = Term.from_triple(
-            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        term = Term(
+            reference=Reference(
+                prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
+            ),
+            type="Instance",
         )
-        term.append_property("code", code)
+        term.append_parent(ADMIN_1)
+        term.annotate_string(CODE_TYPEDEF, code)
         code_to_admin1[code] = term
         country_code = code.split(".")[0]
@@ -132,10 +168,14 @@ def get_code_to_admin2(
     for identifier, name, code in admin2_df[["geonames_id", "name", "code"]].values:
         if pd.isna(identifier) or pd.isna(code):
             continue
-        term = Term.from_triple(
-            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        term = Term(
+            reference=Reference(
+                prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
+            ),
+            type="Instance",
         )
-        term.append_property("code", code)
+        term.append_parent(ADMIN_2)
+        term.annotate_string(CODE_TYPEDEF, code)
         code_to_admin2[code] = term
         admin1_code = code.rsplit(".", 1)[0]
         admin1_term = code_to_admin1.get(admin1_code)
@@ -181,6 +221,19 @@ def _get_cities_df(force: bool = False) -> pd.DataFrame:
     return cities_df
+def _get_synonyms_df(force: bool = False) -> pd.DataFrame:
+    """Get the synonyms dataframe."""
+    path = ensure_path(PREFIX, url=SYNONYMS_URL, force=force)
+    synonyms_df = read_zipfile_csv(
+        path=path,
+        inner_path="alternateNamesV2.txt",
+        header=None,
+        names=SYNONYMS_DF_COLUMNS,
+        dtype=str,
+    )
+    return synonyms_df
 def get_cities(
     code_to_country,
     code_to_admin1,
@@ -188,7 +241,8 @@ def get_cities(
     *,
     minimum_population: int = 100_000,
     force: bool = False,
-) -> Mapping[str, Term]:
+    include_synonyms: bool = False,
+) -> dict[str, Term]:
     """Get a mapping from city code to term."""
     cities_df = _get_cities_df(force=force)
     cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
@@ -200,11 +254,18 @@ def get_cities(
     cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2", "feature_code"]
     for identifier, name, synonyms, country, admin1, admin2, feature_code in cities_df[cols].values:
-        terms[identifier] = term = Term.from_triple(
-            "geonames", identifier, name if pd.notna(name) else None, type="Instance"
+        terms[identifier] = term = Term(
+            reference=Reference(
+                prefix=PREFIX, identifier=identifier, name=name if pd.notna(name) else None
+            ),
+            type="Instance",
         )
-        term.append_parent(Reference(prefix="geonames.feature", identifier=feature_code))
-        if synonyms and not isinstance(synonyms, float):
+        # All cities are under the P branch, but the prefix is omitted for brevity in the TSV
+        term.append_parent(Reference(prefix=PREFIX_FEATURE, identifier=f"P.{feature_code}"))
+        term.append_parent(CITY)
+        if include_synonyms and synonyms and not isinstance(synonyms, float):
+            # TODO include language codes
             for synonym in synonyms:
                 if pd.notna(synonym):
                     term.append_synonym(synonym)
@@ -254,4 +315,4 @@ def get_city_to_country() -> dict[str, str]:
 if __name__ == "__main__":
-    GeonamesGetter().write_default(write_obo=True, force=True)
+    GeonamesGetter.cli()

pyobo 0.11.2__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.0py3-none-any.whl