PyPI - pyobo - Versions diffs - 0.12.10__py3-none-any.whl → 0.12.12__py3-none-any.whl - Mend

pyobo 0.12.10py3-none-any.whl → 0.12.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

pyobo/__init__.py +6 -0
pyobo/api/__init__.py +11 -1
pyobo/api/alts.py +18 -4
pyobo/api/embedding.py +108 -9
pyobo/api/names.py +28 -6
pyobo/api/xrefs.py +21 -1
pyobo/cli/cli.py +9 -3
pyobo/cli/database.py +63 -22
pyobo/cli/lookup.py +39 -24
pyobo/cli/utils.py +6 -2
pyobo/constants.py +66 -7
pyobo/getters.py +8 -3
pyobo/ner/api.py +17 -10
pyobo/ner/scispacy_utils.py +2 -0
pyobo/plugins.py +3 -1
pyobo/sources/__init__.py +2 -0
pyobo/sources/antibodyregistry.py +3 -3
pyobo/sources/bigg/bigg_compartment.py +1 -1
pyobo/sources/complexportal.py +3 -3
pyobo/sources/conso.py +3 -3
pyobo/sources/famplex.py +3 -3
pyobo/sources/goldbook.py +86 -0
pyobo/sources/hgnc/hgnc.py +157 -96
pyobo/sources/hgnc/hgncgenefamily.py +14 -13
pyobo/sources/msigdb.py +3 -3
pyobo/sources/omim_ps.py +8 -2
pyobo/sources/reactome.py +3 -3
pyobo/sources/rgd.py +7 -11
pyobo/sources/slm.py +3 -3
pyobo/sources/uniprot/uniprot.py +3 -3
pyobo/sources/wikipathways.py +7 -2
pyobo/struct/__init__.py +2 -2
pyobo/struct/functional/macros.py +1 -1
pyobo/struct/functional/obo_to_functional.py +7 -3
pyobo/struct/obo/reader.py +4 -4
pyobo/struct/struct.py +48 -18
pyobo/struct/struct_utils.py +19 -5
pyobo/struct/typedef.py +19 -3
pyobo/struct/vocabulary.py +6 -3
pyobo/utils/path.py +5 -4
pyobo/version.py +1 -1
{pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/METADATA +45 -23
{pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/RECORD +46 -45
{pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/WHEEL +1 -1
{pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/entry_points.txt +0 -0
{pyobo-0.12.10.dist-info → pyobo-0.12.12.dist-info}/licenses/LICENSE +0 -0

pyobo/ner/api.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from __future__ import annotations
+import logging
 from collections.abc import Iterable
 from subprocess import CalledProcessError
 from typing import TYPE_CHECKING
@@ -22,6 +23,8 @@ __all__ = [
     "get_grounder",
 ]
+logger = logging.getLogger(__name__)
 def get_grounder(
     prefixes: str | Iterable[str],
@@ -29,25 +32,29 @@ def get_grounder(
     grounder_cls: type[gilda.Grounder] | None = None,
     versions: None | str | Iterable[str | None] | dict[str, str] = None,
     skip_obsolete: bool = False,
+    raise_on_missing: bool = False,
     **kwargs: Unpack[GetOntologyKwargs],
 ) -> ssslm.Grounder:
     """Get a grounder for the given prefix(es)."""
-    literal_mappings: list[LiteralMapping] = []
+    all_literal_mappings: list[LiteralMapping] = []
     it = _clean_prefix_versions(prefixes, versions=versions)
     disable = len(it) == 1 or not check_should_use_tqdm(kwargs)
     for prefix, kwargs["version"] in tqdm(it, leave=False, disable=disable):
         try:
-            literal_mappings.extend(
-                get_literal_mappings(
-                    prefix,
-                    skip_obsolete=skip_obsolete,
-                    **kwargs,
-                )
-            )
-        except (NoBuildError, CalledProcessError):
+            literal_mappings = get_literal_mappings(prefix, skip_obsolete=skip_obsolete, **kwargs)
+        except (NoBuildError, CalledProcessError) as e:
+            logger.warning("[%s] unable to get literal mappings: %s", prefix, e)
             continue
+        else:
+            if not literal_mappings:
+                if raise_on_missing:
+                    raise ValueError(f"no literal mappings were loaded for {prefix}")
+                logger.warning("[%s] no literal mappings loaded", prefix)
+            all_literal_mappings.extend(literal_mappings)
-    return ssslm.make_grounder(literal_mappings, implementation="gilda", grounder_cls=grounder_cls)
+    return ssslm.make_grounder(
+        all_literal_mappings, implementation="gilda", grounder_cls=grounder_cls
+    )
 def _clean_prefix_versions(

pyobo/ner/scispacy_utils.py CHANGED Viewed

@@ -227,6 +227,8 @@ def get_scispacy_entities(prefix: str, **kwargs: Unpack[GetOntologyKwargs]) -> I
     # TODO reuse labels, synonyms, and definitions cache
     ontology = get_ontology(prefix, **kwargs)
     for term in ontology:
+        if not term.name or term.prefix != ontology.ontology:
+            continue
         yield Entity(
             concept_id=term.curie,
             canonical_name=term.name,

pyobo/plugins.py CHANGED Viewed

@@ -4,8 +4,10 @@ from __future__ import annotations
 from collections.abc import Callable, Iterable, Mapping
 from functools import lru_cache
+from typing import TYPE_CHECKING
-from .struct import Obo
+if TYPE_CHECKING:
+    from .struct import Obo
 __all__ = [
     "has_nomenclature_plugin",

pyobo/sources/__init__.py CHANGED Viewed

@@ -29,6 +29,7 @@ from .famplex import FamPlexGetter
 from .flybase import FlyBaseGetter
 from .gard import GARDGetter
 from .geonames import GeonamesFeatureGetter, GeonamesGetter
+from .goldbook import GoldBookGetter
 from .gtdb import GTDBGetter
 from .gwascentral import GWASCentralPhenotypeGetter, GWASCentralStudyGetter
 from .hgnc import HGNCGetter, HGNCGroupGetter
@@ -110,6 +111,7 @@ __all__ = [
     "GWASCentralStudyGetter",
     "GeonamesFeatureGetter",
     "GeonamesGetter",
+    "GoldBookGetter",
     "HGNCGetter",
     "HGNCGroupGetter",
     "IANAGetter",

pyobo/sources/antibodyregistry.py CHANGED Viewed

@@ -12,7 +12,7 @@ from tqdm.auto import tqdm
 from pyobo import Obo, Reference, Term
 from pyobo.api.utils import get_version
-from pyobo.struct.typedef import has_citation
+from pyobo.struct.typedef import is_mentioned_by
 from pyobo.utils.path import ensure_df
 __all__ = [
@@ -47,7 +47,7 @@ class AntibodyRegistryGetter(Obo):
     """An ontology representation of the Antibody Registry."""
     ontology = bioversions_key = PREFIX
-    typedefs = [has_citation]
+    typedefs = [is_mentioned_by]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -97,7 +97,7 @@ def iter_terms(*, force: bool = False, version: str | None = None) -> Iterable[T
                     pubmed_id = pubmed_id.strip()
                     if not pubmed_id:
                         continue
-                    term.append_provenance(Reference(prefix="pubmed", identifier=pubmed_id))
+                    term.append_mentioned_by(Reference(prefix="pubmed", identifier=pubmed_id))
             yield term

pyobo/sources/bigg/bigg_compartment.py CHANGED Viewed

@@ -55,7 +55,7 @@ def get_compartments(*, force: bool = False, version: str | None = None) -> dict
     """Get a dictionary of BiGG compartments."""
     rv = {}
     soup = get_soup(DATA_URL)
-    table = soup.find(**{"class": "myTable"})  # type:ignore[arg-type]
+    table = soup.find(class_="myTable")
     if table is None:
         raise ValueError
     for row in table.find_all("tr"):  # type:ignore[attr-defined]

pyobo/sources/complexportal.py CHANGED Viewed

@@ -14,8 +14,8 @@ from pyobo.struct import (
     Term,
     _parse_str_or_curie_or_uri,
     from_species,
-    has_citation,
     has_part,
+    is_mentioned_by,
 )
 from pyobo.utils.path import ensure_df
@@ -157,7 +157,7 @@ class ComplexPortalGetter(Obo):
     """An ontology representation of the Complex Portal."""
     bioversions_key = ontology = PREFIX
-    typedefs = [from_species, has_part, has_citation]
+    typedefs = [from_species, has_part, is_mentioned_by]
     root_terms = [ROOT]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
@@ -240,7 +240,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             if note == "identity":
                 term.append_xref(reference)
             elif note == "see-also" and reference.prefix == "pubmed":
-                term.append_provenance(reference)
+                term.append_mentioned_by(reference)
             elif (note, reference.prefix) not in unhandled_xref_type:
                 logger.debug(f"unhandled xref type: {note} / {reference.prefix}")
                 unhandled_xref_type.add((note, reference.prefix))

pyobo/sources/conso.py CHANGED Viewed

@@ -4,7 +4,7 @@ from collections.abc import Iterable
 import pandas as pd
-from ..struct import Obo, Reference, Synonym, Term, _parse_str_or_curie_or_uri, has_citation
+from ..struct import Obo, Reference, Synonym, Term, _parse_str_or_curie_or_uri, is_mentioned_by
 from ..utils.io import multidict
 from ..utils.path import ensure_df
@@ -25,7 +25,7 @@ class CONSOGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [has_citation]
+    typedefs = [is_mentioned_by]
     def iter_terms(self, force: bool = False) -> Iterable[Term]:
         """Iterate over terms in the ontology."""
@@ -71,7 +71,7 @@ def iter_terms() -> Iterable[Term]:
                 continue
             reference = _parse_str_or_curie_or_uri(curie)
             if reference is not None:
-                term.append_provenance(reference)
+                term.append_mentioned_by(reference)
         yield term

pyobo/sources/famplex.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pystow.utils import get_commit
 from pyobo import get_name_id_mapping
 from pyobo.struct import Obo, Reference, Term, _parse_str_or_curie_or_uri
-from pyobo.struct.typedef import has_citation, has_member, has_part, is_a, part_of
+from pyobo.struct.typedef import has_member, has_part, is_a, is_mentioned_by, part_of
 from pyobo.utils.io import multidict
 from pyobo.utils.path import ensure_df
@@ -23,7 +23,7 @@ class FamPlexGetter(Obo):
     ontology = PREFIX
     dynamic_version = True
-    typedefs = [has_member, has_part, is_a, part_of, has_citation]
+    typedefs = [has_member, has_part, is_a, part_of, is_mentioned_by]
     def _get_version(self) -> str:
         return get_commit("sorgerlab", "famplex")
@@ -110,7 +110,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
             _parse_str_or_curie_or_uri(provenance) if isinstance(provenance, str) else None
         )
         if provenance_reference:
-            term.append_provenance(provenance_reference)
+            term.append_mentioned_by(provenance_reference)
         for xref_reference in id_xrefs.get(entity, []):
             term.append_xref(xref_reference)

pyobo/sources/goldbook.py ADDED Viewed

@@ -0,0 +1,86 @@
+"""An ontology representation of IUPAC Gold Book."""
+import json.decoder
+from collections.abc import Iterable
+import pystow.utils
+import requests
+from tqdm import tqdm
+from pyobo.struct import Obo, Reference, Term
+from pyobo.utils.path import ensure_path
+PREFIX = "goldbook"
+URL = "https://goldbook.iupac.org/terms/index/all/json/download"
+TERM_URL_FORMAT = "https://goldbook.iupac.org/terms/view/{}/json"
+class GoldBookGetter(Obo):
+    """An ontology representation of IUPAC Gold Book."""
+    ontology = PREFIX
+    dynamic_version = True
+    def iter_terms(self, force: bool = False) -> Iterable[Term]:
+        """Iterate over terms in the ontology."""
+        return _iter_terms()
+def _iter_terms() -> Iterable[Term]:
+    res = requests.get(URL, timeout=15).json()
+    for identifier in tqdm(res["terms"]["list"], unit_scale=True):
+        if term := _get_term(identifier):
+            yield term
+def _get_term(identifier: str) -> Term | None:
+    url = TERM_URL_FORMAT.format(identifier)
+    try:
+        path = ensure_path(PREFIX, "terms", url=url, name=f"{identifier}.json")
+    except pystow.utils.DownloadError:
+        tqdm.write(f"[{PREFIX}:{identifier}] failed to download {url}")
+        return None
+    try:
+        with path.open() as file:
+            res = json.load(file)
+    except json.decoder.JSONDecodeError:
+        tqdm.write(f"[{PREFIX}:{identifier}] failed to parse data in {path}")
+        return None
+    record = res["term"]
+    definitions = record["definitions"]
+    if definitions:
+        definition = _clean(definitions[0]["text"])
+    else:
+        definition = None
+    term = Term(
+        reference=Reference(
+            prefix=PREFIX,
+            identifier=identifier,
+            name=record["title"].strip(),
+        ),
+        definition=definition,
+    )
+    if synonym := record.get("synonym"):
+        if synonym.startswith("<"):
+            if synonym.startswith("<em>synonym</em>:"):
+                synonym = synonym.removeprefix("<em>synonym</em>:")
+                term.append_synonym(_clean(synonym))
+            elif synonym.startswith("<em>synonyms</em>:"):
+                for s in synonym.removeprefix("<em>synonyms</em>:").strip().split(","):
+                    term.append_synonym(_clean(s))
+            else:
+                tqdm.write(f"[{term.curie}] issue with synonym: {synonym}")
+    return term
+def _clean(s: str) -> str:
+    return s.strip().replace("\\n", "\n")
+if __name__ == "__main__":
+    GoldBookGetter.cli()

pyobo/sources/hgnc/hgnc.py CHANGED Viewed

@@ -7,6 +7,7 @@ import typing
 from collections import Counter, defaultdict
 from collections.abc import Iterable
+import obographs
 import pydantic
 from tabulate import tabulate
 from tqdm.auto import tqdm
@@ -14,22 +15,22 @@ from tqdm.auto import tqdm
 from pyobo.api.utils import get_version
 from pyobo.resources.so import get_so_name
 from pyobo.struct import (
+    Annotation,
     Obo,
+    OBOLiteral,
     Reference,
-    SynonymTypeDef,
     Term,
-    TypeDef,
-    default_reference,
     from_species,
     gene_product_member_of,
-    has_citation,
     has_gene_product,
+    is_mentioned_by,
     member_of,
     orthologous,
     transcribes_to,
 )
-from pyobo.struct.typedef import exact_match
-from pyobo.utils.path import ensure_path, prefix_directory_join
+from pyobo.struct.struct import gene_symbol_synonym, previous_gene_symbol, previous_name
+from pyobo.struct.typedef import comment, ends, exact_match, located_in, starts
+from pyobo.utils.path import ensure_path
 __all__ = [
     "HGNCGetter",
@@ -43,26 +44,8 @@ DEFINITIONS_URL_FMT = (
     "hgnc_complete_set_{version}.json"
 )
-previous_symbol_type = SynonymTypeDef(
-    reference=default_reference(PREFIX, "previous_symbol", name="previous symbol")
-)
-alias_symbol_type = SynonymTypeDef(
-    reference=default_reference(PREFIX, "alias_symbol", name="alias symbol")
-)
-previous_name_type = SynonymTypeDef(
-    reference=default_reference(PREFIX, "previous_name", name="previous name")
-)
-alias_name_type = SynonymTypeDef(
-    reference=default_reference(PREFIX, "alias_name", name="alias name")
-)
-HAS_LOCUS_TYPE = TypeDef(
-    reference=default_reference(PREFIX, "locus_type", name="has locus type"), is_metadata_tag=True
-)
-HAS_LOCUS_GROUP = TypeDef(
-    reference=default_reference(PREFIX, "locus_group", name="has locus group"), is_metadata_tag=True
-)
-HAS_LOCATION = TypeDef(
-    reference=default_reference(PREFIX, "location", name="has location"), is_metadata_tag=True
+CHR_URL = (
+    "https://raw.githubusercontent.com/monarch-initiative/monochrom/refs/heads/master/chr.json"
 )
 #: First column is MIRIAM prefix, second column is HGNC key
@@ -157,7 +140,7 @@ LOCUS_TYPE_TO_SO = {
     "complex locus constituent": "0000997",  # https://github.com/pyobo/pyobo/issues/118#issuecomment-1564520052
     # non-coding RNA
     "RNA, Y": "0002359",
-    "RNA, cluster": "",  # TODO see https://github.com/The-Sequence-Ontology/SO-Ontologies/issues/564
+    "RNA, cluster": "0003001",  # TODO see https://github.com/The-Sequence-Ontology/SO-Ontologies/issues/564
     "RNA, long non-coding": "0002127",  # HGNC links to wrong one
     "RNA, micro": "0001265",
     "RNA, misc": "0001266",
@@ -180,7 +163,7 @@ LOCUS_TYPE_TO_SO = {
     "fragile site": "0002349",
     "readthrough": "0000697",  # maybe not right
     "transposable element": "0000111",  # HGNC links to wrong one
-    "virus integration site": "",  # TODO see https://github.com/The-Sequence-Ontology/SO-Ontologies/issues/551
+    "virus integration site": "0003002",  # TODO see https://github.com/The-Sequence-Ontology/SO-Ontologies/issues/551
     "region": "0001411",  # a small bucket for things that need a better annotation, even higher than "gene"
     "unknown": "0000704",  # gene
     None: "0000704",  # gene
@@ -190,6 +173,14 @@ PUBLICATION_TERM = Term(
     reference=Reference(prefix="IAO", identifier="0000013", name="journal article")
 )
+#: Indicates the cytogenetic location of the gene or region on the chromsome.
+#: In the absence of that information one of the following may be listed.
+QUALIFIERS = {
+    " not on reference assembly": "not on reference assembly -named gene is not annotated on the current version of the Genome Reference Consortium human reference assembly; may have been annotated on previous assembly versions or on a non-reference human assembly",
+    " unplaced": "unplaced - named gene is annotated on an unplaced/unlocalized scaffold of the human reference assembly",
+    " alternate reference locus": "reserved - named gene has never been annotated on any human assembly",
+}
 class HGNCGetter(Obo):
     """An ontology representation of HGNC's gene nomenclature."""
@@ -203,16 +194,16 @@ class HGNCGetter(Obo):
         orthologous,
         member_of,
         exact_match,
-        has_citation,
-        HAS_LOCUS_GROUP,
-        HAS_LOCUS_TYPE,
-        HAS_LOCATION,
+        is_mentioned_by,
+        located_in,
+        starts,
+        ends,
+        comment,
     ]
     synonym_typedefs = [
-        previous_name_type,
-        previous_symbol_type,
-        alias_name_type,
-        alias_symbol_type,
+        previous_name,
+        previous_gene_symbol,
+        gene_symbol_synonym,
     ]
     root_terms = [
         Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id))
@@ -225,12 +216,28 @@ class HGNCGetter(Obo):
         return get_terms(force=force, version=self.data_version)
+def _get_location_to_chr() -> dict[str, Reference]:
+    uri_prefix = "http://purl.obolibrary.org/obo/CHR_9606-chr"
+    graph: obographs.Graph = obographs.read(CHR_URL, squeeze=True)
+    rv = {}
+    for node in graph.nodes:
+        if node.id.startswith(uri_prefix):
+            identifier = node.id.removeprefix(uri_prefix)
+            rv[identifier] = Reference(
+                prefix="CHR", identifier=f"9606-chr{identifier}", name=node.lbl
+            )
+    return rv
 def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]:
     """Get HGNC terms."""
     if version is None:
         version = get_version("hgnc")
+    unhandled_locations: defaultdict[str, set[str]] = defaultdict(set)
+    location_to_chr = _get_location_to_chr()
     unhandled_entry_keys: typing.Counter[str] = Counter()
-    unhandle_locus_types: defaultdict[str, dict[str, Term]] = defaultdict(dict)
     path = ensure_path(
         PREFIX,
         url=DEFINITIONS_URL_FMT.format(version=version),
@@ -352,7 +359,6 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                 xref_identifiers = [str(xref_identifiers)]
             if xref_prefix == "merops.entry":
-                continue
                 # e.g., XM02-001 should be rewritten as XM02.001
                 xref_identifiers = [i.replace("-", ".") for i in xref_identifiers]
@@ -375,7 +381,7 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                     term.append_xref(Reference(prefix=xref_prefix, identifier=str(xref_identifier)))
         for pubmed_id in entry.pop("pubmed_id", []):
-            term.append_provenance(Reference(prefix="pubmed", identifier=str(pubmed_id)))
+            term.append_mentioned_by(Reference(prefix="pubmed", identifier=str(pubmed_id)))
         gene_group_ids = entry.pop("gene_group_id", [])
         gene_groups = entry.pop("gene_group", [])
@@ -390,34 +396,118 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
             )
         for alias_symbol in entry.pop("alias_symbol", []):
-            term.append_synonym(alias_symbol, type=alias_symbol_type)
+            term.append_synonym(alias_symbol, type=gene_symbol_synonym)
         for alias_name in entry.pop("alias_name", []):
-            term.append_synonym(alias_name, type=alias_name_type)
+            # regular synonym, no type needed.
+            term.append_synonym(alias_name)
         for previous_symbol in itt.chain(
             entry.pop("previous_symbol", []), entry.pop("prev_symbol", [])
         ):
-            term.append_synonym(previous_symbol, type=previous_symbol_type)
-        for previous_name in entry.pop("prev_name", []):
-            term.append_synonym(previous_name, type=previous_name_type)
+            term.append_synonym(previous_symbol, type=previous_gene_symbol)
+        for previous_name_ in entry.pop("prev_name", []):
+            term.append_synonym(previous_name_, type=previous_name)
+        location: str | None = entry.pop("location", None)
+        if location is not None and location not in {
+            "not on reference assembly",
+            "unplaced",
+            "reserved",
+        }:
+            annotations = []
+            for qualifier_suffix, qualifier_text in QUALIFIERS.items():
+                if location.endswith(qualifier_suffix):
+                    location = location.removesuffix(qualifier_suffix)
+                    annotations.append(
+                        Annotation(
+                            predicate=comment.reference, value=OBOLiteral.string(qualifier_text)
+                        )
+                    )
+                    break
-        for prop, td in [("location", HAS_LOCATION)]:
-            value = entry.pop(prop, None)
-            if value:
-                term.annotate_string(td, value)
+            if location in location_to_chr:
+                term.append_relationship(
+                    located_in, location_to_chr[location], annotations=annotations
+                )
+            elif location == "mitochondria":
+                term.append_relationship(
+                    located_in,
+                    Reference(prefix="go", identifier="0000262", name="mitochondrial chromosome"),
+                    annotations=annotations,
+                )
+            elif " and " in location:
+                left, _, right = location.partition(" and ")
+                if left not in location_to_chr:
+                    unhandled_locations[left].add(identifier)
+                elif right not in location_to_chr:
+                    unhandled_locations[right].add(identifier)
+                elif left in location_to_chr and right in location_to_chr:
+                    term.append_relationship(
+                        located_in, location_to_chr[left], annotations=annotations
+                    )
+                    term.append_relationship(
+                        located_in, location_to_chr[right], annotations=annotations
+                    )
+                else:
+                    unhandled_locations[location].add(identifier)
+            elif " or " in location:
+                left, _, right = location.partition(" or ")
+                if left not in location_to_chr:
+                    unhandled_locations[left].add(identifier)
+                elif right not in location_to_chr:
+                    unhandled_locations[right].add(identifier)
+                elif left in location_to_chr and right in location_to_chr:
+                    # FIXME implement
+                    unhandled_locations[location].add(identifier)
+                else:
+                    unhandled_locations[location].add(identifier)
+            elif "-" in location:
+                start, _, end = location.partition("-")
+                # the range that sarts with a q needs
+                # the chromosome moved over, like in
+                # 17q24.2-q24.3
+                if end.startswith("q"):
+                    chr, _, _ = start.partition("q")
+                    end = f"{chr}{end}"
+                # the range that sarts with a p needs
+                # the chromosome moved over, like in
+                # 1p34.2-p34.1
+                elif end.startswith("p"):
+                    chr, _, _ = start.partition("p")
+                    end = f"{chr}{end}"
+                if start not in location_to_chr:
+                    unhandled_locations[start].add(identifier)
+                elif end not in location_to_chr:
+                    unhandled_locations[end].add(identifier)
+                elif start in location_to_chr and end in location_to_chr:
+                    term.append_relationship(
+                        starts, location_to_chr[start], annotations=annotations
+                    )
+                    term.append_relationship(ends, location_to_chr[end], annotations=annotations)
+                else:
+                    unhandled_locations[location].add(identifier)
+            else:
+                unhandled_locations[location].add(identifier)
         locus_type = entry.pop("locus_type")
-        locus_group = entry.pop("locus_group")
+        # note that locus group is a more broad category than locus type,
+        # and since we already have an exhaustive mapping from locus type
+        # to SO, then we can throw this annotation away
+        _locus_group = entry.pop("locus_group")
         so_id = LOCUS_TYPE_TO_SO.get(locus_type)
-        if so_id:
-            term.append_parent(Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
-        else:
-            term.append_parent(
-                Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
-            )  # gene
-            unhandle_locus_types[locus_type][identifier] = term
-            term.annotate_string(HAS_LOCUS_TYPE, locus_type)
-            term.annotate_string(HAS_LOCUS_GROUP, locus_group)
+        if not so_id:
+            raise ValueError("""\
+                HGNC has updated their list of locus types, so the HGNC script is currently
+                incomplete. This can be fixed by updating the ``LOCUS_TYPE_TO_SO`` dictionary
+                to point to a new SO term. If there is none existing, then make a pull request
+                to https://github.com/The-Sequence-Ontology/SO-Ontologies like in
+                https://github.com/The-Sequence-Ontology/SO-Ontologies/pull/668. If the
+                maintainers aren't responsive, you can still use the proposed term before it's
+                accepted upstream like was done for SO:0003001 and SO:0003002
+            """)
+        term.append_parent(Reference(prefix="SO", identifier=so_id, name=get_so_name(so_id)))
         term.set_species(identifier="9606", name="Homo sapiens")
         for key in entry:
@@ -425,45 +515,16 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
                 unhandled_entry_keys[key] += 1
         yield term
-    with open(prefix_directory_join(PREFIX, name="unhandled.json"), "w") as file:
-        json.dump(
-            {
-                k: {hgnc_id: term.name for hgnc_id, term in v.items()}
-                for k, v in unhandle_locus_types.items()
-            },
-            file,
-            indent=2,
-        )
-    with open(prefix_directory_join(PREFIX, name="unhandled.md"), "w") as file:
-        for k, v in sorted(unhandle_locus_types.items()):
-            t = tabulate(
-                [
-                    (
-                        hgnc_id,
-                        term.name,
-                        term.is_obsolete,
-                        f"https://bioregistry.io/{term.curie}",
-                        ", ".join(
-                            f"https://bioregistry.io/{p.curie}"
-                            for p in term.provenance
-                            if isinstance(p, Reference)
-                        ),
-                    )
-                    for hgnc_id, term in sorted(v.items())
-                ],
-                headers=["hgnc_id", "name", "obsolete", "link", "provenance"],
+    if unhandled_locations:
+        logger.warning(
+            "Unhandled chromosomal locations:\n\n%s\n",
+            tabulate(
+                [(k, len(vs), f"HGNC:{min(vs)}") for k, vs in unhandled_locations.items()],
+                headers=["location", "count", "example"],
                 tablefmt="github",
-            )
-            print(f"## {k} ({len(v)})", file=file)
-            print(t, "\n", file=file)
+            ),
+        )
-    unhandle_locus_type_counter = Counter(
-        {locus_type: len(d) for locus_type, d in unhandle_locus_types.items()}
-    )
-    logger.warning(
-        "Unhandled locus types:\n%s", tabulate(unhandle_locus_type_counter.most_common())
-    )
     if unhandled_entry_keys:
         logger.warning("Unhandled keys:\n%s", tabulate(unhandled_entry_keys.most_common()))

pyobo 0.12.10__py3-none-any.whl → 0.12.12__py3-none-any.whl

pyobo 0.12.10py3-none-any.whl → 0.12.12py3-none-any.whl