PyPI - pyobo - Versions diffs - 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl - Mend

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (242) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -113
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +108 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +183 -161
pyobo/{xrefdb/xrefs_pipeline.py → cli/database_utils.py} +54 -73
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +196 -118
pyobo/gilda_utils.py +79 -200
pyobo/identifier_utils/__init__.py +41 -0
pyobo/identifier_utils/api.py +296 -0
pyobo/identifier_utils/model.py +130 -0
pyobo/identifier_utils/preprocessing.json +812 -0
pyobo/identifier_utils/preprocessing.py +61 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +43 -39
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1358 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +0 -5
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +3 -8
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +10 -3
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +270 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1413 -643
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +13 -11
pyobo/utils/io.py +17 -31
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +76 -70
pyobo/version.py +3 -3
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/METADATA +224 -225
pyobo-0.12.0.dist-info/RECORD +202 -0
pyobo-0.12.0.dist-info/WHEEL +4 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info}/entry_points.txt +1 -0
{pyobo-0.11.1.dist-info → pyobo-0.12.0.dist-info/licenses}/LICENSE +0 -0
pyobo/apps/__init__.py +0 -3
pyobo/apps/cli.py +0 -24
pyobo/apps/gilda/__init__.py +0 -3
pyobo/apps/gilda/__main__.py +0 -8
pyobo/apps/gilda/app.py +0 -48
pyobo/apps/gilda/cli.py +0 -36
pyobo/apps/gilda/templates/base.html +0 -33
pyobo/apps/gilda/templates/home.html +0 -11
pyobo/apps/gilda/templates/matches.html +0 -32
pyobo/apps/mapper/__init__.py +0 -3
pyobo/apps/mapper/__main__.py +0 -11
pyobo/apps/mapper/cli.py +0 -37
pyobo/apps/mapper/mapper.py +0 -187
pyobo/apps/mapper/templates/base.html +0 -35
pyobo/apps/mapper/templates/mapper_home.html +0 -64
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo-0.11.1.dist-info/RECORD +0 -173
pyobo-0.11.1.dist-info/WHEEL +0 -5
pyobo-0.11.1.dist-info/top_level.txt +0 -1

pyobo/sources/{hgnc.py → hgnc/hgnc.py} RENAMED Viewed

@@ -6,8 +6,6 @@ import logging
 import typing
 from collections import Counter, defaultdict
 from collections.abc import Iterable
-from operator import attrgetter
-from typing import Optional
 from tabulate import tabulate
 from tqdm.auto import tqdm
@@ -17,11 +15,13 @@ from pyobo.resources.so import get_so_name
 from pyobo.struct import (
     Obo,
     Reference,
-    Synonym,
     SynonymTypeDef,
     Term,
+    TypeDef,
+    default_reference,
     from_species,
     gene_product_member_of,
+    has_citation,
     has_gene_product,
     member_of,
     orthologous,
@@ -42,10 +42,27 @@ DEFINITIONS_URL_FMT = (
     "hgnc_complete_set_{version}.json"
 )
-previous_symbol_type = SynonymTypeDef.from_text("previous_symbol")
-alias_symbol_type = SynonymTypeDef.from_text("alias_symbol")
-previous_name_type = SynonymTypeDef.from_text("previous_name")
-alias_name_type = SynonymTypeDef.from_text("alias_name")
+previous_symbol_type = SynonymTypeDef(
+    reference=default_reference(PREFIX, "previous_symbol", name="previous symbol")
+)
+alias_symbol_type = SynonymTypeDef(
+    reference=default_reference(PREFIX, "alias_symbol", name="alias symbol")
+)
+previous_name_type = SynonymTypeDef(
+    reference=default_reference(PREFIX, "previous_name", name="previous name")
+)
+alias_name_type = SynonymTypeDef(
+    reference=default_reference(PREFIX, "alias_name", name="alias name")
+)
+HAS_LOCUS_TYPE = TypeDef(
+    reference=default_reference(PREFIX, "locus_type", name="has locus type"), is_metadata_tag=True
+)
+HAS_LOCUS_GROUP = TypeDef(
+    reference=default_reference(PREFIX, "locus_group", name="has locus group"), is_metadata_tag=True
+)
+HAS_LOCATION = TypeDef(
+    reference=default_reference(PREFIX, "location", name="has location"), is_metadata_tag=True
+)
 #: First column is MIRIAM prefix, second column is HGNC key
 gene_xrefs = [
@@ -129,6 +146,7 @@ SKIP_KEYS = {
     "cd",  # symbol
     "homeodb",  # TODO add to bioregistry, though this is defunct
     "mamit-trnadb",  # TODO add to bioregistry, though this is defunct
+    "mane_select",  # TODO
 }
 #: A mapping from HGNC's locus_type annotations to sequence ontology identifiers
@@ -167,38 +185,8 @@ LOCUS_TYPE_TO_SO = {
     None: "0000704",  # gene
 }
-IDSPACES = {
-    prefix: f"https://bioregistry.io/{prefix}:"
-    for prefix in {
-        "rgd",
-        "mgi",
-        "eccode",
-        "rnacentral",
-        "pubmed",
-        "uniprot",
-        "mirbase",
-        "snornabase",
-        "hgnc",
-        "hgnc.genegroup",
-        "debio",
-        "ensembl",
-        "NCBIGene",
-        "vega",
-        "ucsc",
-        "ena",
-        "ccds",
-        "omim",
-        "cosmic",
-        "merops",
-        "orphanet",
-        "pseudogene",
-        "lncipedia",
-        "refseq",
-    }
-}
-IDSPACES.update(
-    NCBITaxon="http://purl.obolibrary.org/obo/NCBITaxon_",
-    SO="http://purl.obolibrary.org/obo/SO_",
+PUBLICATION_TERM = Term(
+    reference=Reference(prefix="IAO", identifier="0000013", name="journal article")
 )
@@ -214,8 +202,11 @@ class HGNCGetter(Obo):
         orthologous,
         member_of,
         exact_match,
+        has_citation,
+        HAS_LOCUS_GROUP,
+        HAS_LOCUS_TYPE,
+        HAS_LOCATION,
     ]
-    idspaces = IDSPACES
     synonym_typedefs = [
         previous_name_type,
         previous_symbol_type,
@@ -233,12 +224,7 @@ class HGNCGetter(Obo):
         return get_terms(force=force, version=self.data_version)
-def get_obo(*, force: bool = False) -> Obo:
-    """Get HGNC as OBO."""
-    return HGNCGetter(force=force)
-def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
+def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
     """Get HGNC terms."""
     if version is None:
         version = get_version("hgnc")
@@ -251,18 +237,15 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         version=version,
         name="hgnc_complete_set.json",
     )
-    with open(path) as file:
+    with path.open() as file:
         entries = json.load(file)["response"]["docs"]
     yield Term.from_triple("NCBITaxon", "9606", "Homo sapiens")
-    yield from sorted(
-        {
-            Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
-            for so_id in sorted(LOCUS_TYPE_TO_SO.values())
-            if so_id
-        },
-        key=attrgetter("identifier"),
-    )
+    _so_ids: set[str] = {s for s in LOCUS_TYPE_TO_SO.values() if s}
+    yield from [
+        Term(reference=Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
+        for so_id in sorted(_so_ids)
+    ]
     statuses = set()
     for entry in tqdm(entries, desc=f"Mapping {PREFIX}", unit="gene", unit_scale=True):
@@ -273,7 +256,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         )
         status = entry.pop("status")
         if status == "Approved":
-            is_obsolete = False
+            is_obsolete = None
         elif status not in statuses:
             statuses.add(status)
             tqdm.write(f"[{PREFIX}] unhandled {status}")
@@ -297,7 +280,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
                 continue  # only add concrete annotations
             term.append_relationship(
                 gene_product_member_of,
-                Reference(prefix="eccode", identifier=ec_code),
+                Reference(prefix="ec", identifier=ec_code),
             )
         for rna_central_ids in entry.pop("rna_central_id", []):
             for rna_central_id in rna_central_ids.split(","):
@@ -364,7 +347,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
             xref_identifiers = entry.pop(key, None)
             if xref_identifiers is None:
                 continue
-            if isinstance(xref_identifiers, (str, int)):
+            if isinstance(xref_identifiers, str | int):
                 xref_identifiers = [str(xref_identifiers)]
             if xref_prefix == "merops.entry":
@@ -389,7 +372,7 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
         gene_group_ids = entry.pop("gene_group_id", [])
         gene_groups = entry.pop("gene_group", [])
-        for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups):
+        for gene_group_id, gene_group_label in zip(gene_group_ids, gene_groups, strict=False):
             term.append_relationship(
                 member_of,
                 Reference(
@@ -400,20 +383,20 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
             )
         for alias_symbol in entry.pop("alias_symbol", []):
-            term.append_synonym(Synonym(name=alias_symbol, type=alias_symbol_type))
+            term.append_synonym(alias_symbol, type=alias_symbol_type)
         for alias_name in entry.pop("alias_name", []):
-            term.append_synonym(Synonym(name=alias_name, type=alias_name_type))
+            term.append_synonym(alias_name, type=alias_name_type)
         for previous_symbol in itt.chain(
             entry.pop("previous_symbol", []), entry.pop("prev_symbol", [])
         ):
-            term.append_synonym(Synonym(name=previous_symbol, type=previous_symbol_type))
+            term.append_synonym(previous_symbol, type=previous_symbol_type)
         for previous_name in entry.pop("prev_name", []):
-            term.append_synonym(Synonym(name=previous_name, type=previous_name_type))
+            term.append_synonym(previous_name, type=previous_name_type)
-        for prop in ["location"]:
+        for prop, td in [("location", HAS_LOCATION)]:
             value = entry.pop(prop, None)
             if value:
-                term.append_property(prop, value)
+                term.annotate_string(td, value)
         locus_type = entry.pop("locus_type")
         locus_group = entry.pop("locus_group")
@@ -425,8 +408,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
                 Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
             )  # gene
             unhandle_locus_types[locus_type][identifier] = term
-            term.append_property("locus_type", locus_type)
-            term.append_property("locus_group", locus_group)
+            term.annotate_string(HAS_LOCUS_TYPE, locus_type)
+            term.annotate_string(HAS_LOCUS_GROUP, locus_group)
         term.set_species(identifier="9606", name="Homo sapiens")
@@ -453,9 +436,11 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
                         hgnc_id,
                         term.name,
                         term.is_obsolete,
-                        term.bioregistry_link,
+                        f"https://bioregistry.io/{term.curie}",
                         ", ".join(
-                            p.bioregistry_link for p in term.provenance if p.bioregistry_link
+                            f"https://bioregistry.io/{p.curie}"
+                            for p in term.provenance
+                            if isinstance(p, Reference)
                         ),
                     )
                     for hgnc_id, term in sorted(v.items())
@@ -472,7 +457,8 @@ def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Te
     logger.warning(
         "Unhandled locus types:\n%s", tabulate(unhandle_locus_type_counter.most_common())
     )
-    logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))
+    if unhandled_entry_keys:
+        logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))
 if __name__ == "__main__":

pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} RENAMED Viewed

@@ -5,16 +5,9 @@ from collections.abc import Iterable, Mapping
 import pandas as pd
-from ..struct import (
-    Obo,
-    Reference,
-    Synonym,
-    SynonymTypeDef,
-    Term,
-    enables,
-    from_species,
-)
-from ..utils.path import ensure_path
+from ...struct import Obo, Reference, SynonymTypeDef, Term, has_citation
+from ...struct.typedef import enables, exact_match, from_species
+from ...utils.path import ensure_path
 __all__ = [
     "HGNCGroupGetter",
@@ -36,18 +29,13 @@ class HGNCGroupGetter(Obo):
     ontology = PREFIX
     bioversions_key = "hgnc"
     synonym_typedefs = [symbol_type]
-    typedefs = [from_species, enables]
+    typedefs = [from_species, enables, exact_match, has_citation]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
         return get_terms(force=force)
-def get_obo(force: bool = False) -> Obo:
-    """Get HGNC Gene Groups as OBO."""
-    return HGNCGroupGetter(force=force)
 def get_hierarchy(force: bool = False) -> Mapping[str, list[str]]:
     """Get the HGNC Gene Families hierarchy as a dictionary."""
     path = ensure_path(PREFIX, url=HIERARCHY_URL, force=force)
@@ -99,12 +87,14 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]:
         )
         if pubmed_ids and pd.notna(pubmed_ids):
             for s in pubmed_ids.replace(" ", ",").split(","):
-                term.append_provenance(Reference(prefix="pubmed", identifier=s.strip()))
+                s = s.strip()
+                if s:
+                    term.append_provenance(Reference(prefix="pubmed", identifier=s))
         if desc_go and pd.notna(desc_go):
             go_id = desc_go[len("http://purl.uniprot.org/go/") :]
             term.append_relationship(enables, Reference(prefix="GO", identifier=go_id))
         if symbol and pd.notna(symbol):
-            term.append_synonym(Synonym(name=symbol, type=symbol_type))
+            term.append_synonym(symbol, type=symbol_type)
         term.set_species(identifier="9606", name="Homo sapiens")
         yield term

pyobo/sources/icd/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Resources from ICD."""
+from .icd10 import ICD10Getter
+from .icd11 import ICD11Getter
+__all__ = [
+    "ICD10Getter",
+    "ICD11Getter",
+]

pyobo/sources/{icd10.py → icd/icd10.py} RENAMED Viewed

@@ -1,24 +1,27 @@
 """Convert ICD-10 to OBO.
-Run with python -m pyobo.sources.icd10 -v
+Run with ``python -m pyobo.sources.icd10 -v``.
+.. note::
+    If web requests are stalling, try deleting the ``~/.cachier`` directory.
 """
 import logging
 from collections.abc import Iterable, Mapping
+from pathlib import Path
 from typing import Any
-import click
-from more_click import verbose_option
 from tqdm.auto import tqdm
-from ..sources.icd_utils import (
+from .icd_utils import (
     ICD10_TOP_LEVEL_URL,
     get_child_identifiers,
-    get_icd,
+    get_icd_10_top,
     visiter,
 )
-from ..struct import Obo, Reference, Synonym, Term
-from ..utils.path import prefix_directory_join
+from ...struct import Obo, Reference, Synonym, Term, has_category
+from ...utils.path import prefix_directory_join
 __all__ = [
     "ICD10Getter",
@@ -34,37 +37,39 @@ class ICD10Getter(Obo):
     """An ontology representation of ICD-10."""
     ontology = PREFIX
-    dynamic_version = True
+    static_version = VERSION
+    typedefs = [has_category]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
-        return iter_terms()
+        return iter_terms(self._version_or_raise)
-def get_obo() -> Obo:
-    """Get ICD-10 as OBO."""
-    return ICD10Getter()
+def _get_chapters(version: str, path: Path):
+    res_json = get_icd_10_top(version=version, path=path)
+    chapter_urls = res_json["child"]
+    tqdm.write(f"there are {len(chapter_urls)} chapters")
+    identifiers = get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json)
+    return identifiers
-def iter_terms() -> Iterable[Term]:
+def iter_terms(version: str) -> Iterable[Term]:
     """Iterate over ICD-10 terms."""
-    r = get_icd(ICD10_TOP_LEVEL_URL)
-    res_json = r.json()
-    directory = prefix_directory_join(PREFIX, version=VERSION)
-    chapter_urls = res_json["child"]
-    tqdm.write(f"there are {len(chapter_urls)} chapters")
+    directory = prefix_directory_join(PREFIX, version=version)
+    identifiers = _get_chapters(version=version, path=directory.joinpath("top.json"))
     visited_identifiers: set[str] = set()
-    for identifier in get_child_identifiers(ICD10_TOP_LEVEL_URL, res_json):
-        yield from visiter(
-            identifier,
-            visited_identifiers,
-            directory,
-            endpoint=ICD10_TOP_LEVEL_URL,
-            converter=_extract_icd10,
-        )
+    with tqdm(desc=f"[{PREFIX}]") as pbar:
+        for identifier in identifiers:
+            for term in visiter(
+                identifier,
+                visited_identifiers,
+                directory,
+                endpoint=ICD10_TOP_LEVEL_URL,
+                converter=_extract_icd10,
+            ):
+                pbar.update(1)
+                yield term
 def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
@@ -81,17 +86,10 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
         synonyms=synonyms,
         parents=parents,
     )
-    rv.append_property("class_kind", res_json["classKind"])
+    rv.annotate_string(has_category, res_json["classKind"])
     return rv
-@click.command()
-@verbose_option
-def _main():
-    get_obo().write_default(use_tqdm=True)
 if __name__ == "__main__":
-    _main()
+    ICD10Getter.cli()

pyobo/sources/icd/icd11.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Convert ICD11 to OBO.
+Run with ``python -m pyobo.sources.icd11 -v``.
+.. note::
+    If web requests are stalling, try deleting the ``~/.cachier`` directory.
+"""
+import json
+import logging
+from collections.abc import Iterable, Mapping
+from typing import Any
+from tqdm.auto import tqdm
+from .icd_utils import (
+    ICD11_TOP_LEVEL_URL,
+    ICDError,
+    get_child_identifiers,
+    get_icd,
+    get_icd_11_mms,
+    visiter,
+)
+from ...struct import Obo, Reference, Synonym, Term, TypeDef, default_reference
+from ...utils.path import prefix_directory_join
+__all__ = [
+    "ICD11Getter",
+]
+logger = logging.getLogger(__name__)
+PREFIX = "icd11"
+CODE_PREFIX = "icd11.code"
+CODE_PROP = TypeDef(reference=default_reference(PREFIX, "icd_mms_code"), is_metadata_tag=True)
+class ICD11Getter(Obo):
+    """An ontology representation of ICD-11."""
+    ontology = PREFIX
+    typedefs = [CODE_PROP]
+    dynamic_version = True
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return iterate_icd11()
+def iterate_icd11(version: str | None = None) -> Iterable[Term]:
+    """Iterate over the terms in ICD11 and enrich them with MMS."""
+    # Get all terms from the ICD foundation API
+    version_strict, terms = _get_icd11_terms_helper(version=version)
+    # prepare a directory for enriching from MMS
+    mms_directory = prefix_directory_join(PREFIX, "mms", version=version_strict)
+    # this takes a bit more than 2 hours
+    for term in tqdm(terms, desc="Getting MMS", unit_scale=True):
+        path = mms_directory.joinpath(term.identifier).with_suffix(".json")
+        if path.exists():
+            mms_data = json.loads(path.read_text())
+        else:
+            try:
+                mms_data = get_icd_11_mms(term.identifier)
+            except ICDError:
+                # writing this isn't necessary since not all terms have MMS entries
+                # tqdm.write(str(e))
+                mms_data = {}
+            path.write_text(json.dumps(mms_data))
+        if code := mms_data.get("code"):
+            term.append_exact_match(Reference(prefix=CODE_PREFIX, identifier=code))
+        yield term
+def _get_icd11_terms_helper(version: str | None = None) -> tuple[str, list[Term]]:
+    """Iterate over the terms in ICD11.
+    The API doesn't seem to have a rate limit, but returns pretty slow. This means that
+    it only gets results at at about 5 calls/second. Get ready to be patient - the API
+    token expires every hour so there's a caching mechanism with :mod:`cachier` that
+    gets a new one every hour.
+    """
+    if version is not None:
+        directory = prefix_directory_join(PREFIX, "base", version=version)
+        top_path = directory.joinpath("top.json")
+        if top_path.is_file():
+            res_json = json.loads(top_path.read_text())
+        else:
+            res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
+            top_path.write_text(json.dumps(res_json, indent=2))
+    else:
+        tqdm.write("No version passed, looking up version from ICD11")
+        res_json = get_icd(ICD11_TOP_LEVEL_URL).json()
+        version = res_json["releaseId"]
+        directory = prefix_directory_join(PREFIX, "base", version=version)
+        top_path = directory.joinpath("top.json")
+        with top_path.open("w") as file:
+            json.dump(res_json, file, indent=2)
+    tqdm.write(f"There are {len(res_json['child'])} top level entities")
+    visited_identifiers: set[str] = set()
+    rv: list[Term] = []
+    for identifier in get_child_identifiers(ICD11_TOP_LEVEL_URL, res_json):
+        rv.extend(
+            visiter(
+                identifier,
+                visited_identifiers,
+                directory,
+                endpoint=ICD11_TOP_LEVEL_URL,
+                converter=_extract_icd11,
+            )
+        )
+    return version, rv
+def _extract_icd11(res_json: Mapping[str, Any]) -> Term:
+    identifier = res_json["@id"][len(ICD11_TOP_LEVEL_URL) :].lstrip("/")
+    if "definition" in res_json:
+        definition = res_json["definition"]["@value"]
+        definition = definition.strip().replace("\r\n", " ")
+        definition = definition.strip().replace("\\n", " ")
+        definition = definition.strip().replace("\n", " ")
+    else:
+        definition = None
+    name = res_json["title"]["@value"]
+    synonyms = [Synonym(synonym["label"]["@value"]) for synonym in res_json.get("synonym", [])]
+    parents = [
+        Reference(prefix=PREFIX, identifier=url[len("http://id.who.int/icd/entity/") :])
+        for url in res_json["parent"]
+        if url[len("http://id.who.int/icd/entity/") :]
+    ]
+    return Term(
+        reference=Reference(prefix=PREFIX, identifier=identifier, name=name),
+        definition=definition,
+        synonyms=synonyms,
+        parents=parents,
+    )
+if __name__ == "__main__":
+    ICD11Getter.cli()

pyobo 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

pyobo 0.11.1py3-none-any.whl → 0.12.0py3-none-any.whl