PyPI - pyobo - Versions diffs - 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl - Mend

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

pyobo/.DS_Store +0 -0
pyobo/__init__.py +95 -20
pyobo/__main__.py +0 -0
pyobo/api/__init__.py +81 -10
pyobo/api/alts.py +52 -42
pyobo/api/combine.py +39 -0
pyobo/api/edges.py +68 -0
pyobo/api/hierarchy.py +231 -203
pyobo/api/metadata.py +14 -19
pyobo/api/names.py +207 -127
pyobo/api/properties.py +117 -117
pyobo/api/relations.py +68 -94
pyobo/api/species.py +24 -21
pyobo/api/typedefs.py +11 -11
pyobo/api/utils.py +66 -13
pyobo/api/xrefs.py +107 -114
pyobo/cli/__init__.py +0 -0
pyobo/cli/cli.py +35 -50
pyobo/cli/database.py +210 -160
pyobo/cli/database_utils.py +155 -0
pyobo/cli/lookup.py +163 -195
pyobo/cli/utils.py +19 -6
pyobo/constants.py +102 -3
pyobo/getters.py +209 -191
pyobo/gilda_utils.py +52 -250
pyobo/identifier_utils/__init__.py +33 -0
pyobo/identifier_utils/api.py +305 -0
pyobo/identifier_utils/preprocessing.json +873 -0
pyobo/identifier_utils/preprocessing.py +27 -0
pyobo/identifier_utils/relations/__init__.py +8 -0
pyobo/identifier_utils/relations/api.py +162 -0
pyobo/identifier_utils/relations/data.json +5824 -0
pyobo/identifier_utils/relations/data_owl.json +57 -0
pyobo/identifier_utils/relations/data_rdf.json +1 -0
pyobo/identifier_utils/relations/data_rdfs.json +7 -0
pyobo/mocks.py +9 -6
pyobo/ner/__init__.py +9 -0
pyobo/ner/api.py +72 -0
pyobo/ner/normalizer.py +33 -0
pyobo/obographs.py +48 -40
pyobo/plugins.py +5 -4
pyobo/py.typed +0 -0
pyobo/reader.py +1354 -395
pyobo/reader_utils.py +155 -0
pyobo/resource_utils.py +42 -22
pyobo/resources/__init__.py +0 -0
pyobo/resources/goc.py +75 -0
pyobo/resources/goc.tsv +188 -0
pyobo/resources/ncbitaxon.py +4 -5
pyobo/resources/ncbitaxon.tsv.gz +0 -0
pyobo/resources/ro.py +3 -2
pyobo/resources/ro.tsv +0 -0
pyobo/resources/so.py +0 -0
pyobo/resources/so.tsv +0 -0
pyobo/sources/README.md +12 -8
pyobo/sources/__init__.py +52 -29
pyobo/sources/agrovoc.py +0 -0
pyobo/sources/antibodyregistry.py +11 -12
pyobo/sources/bigg/__init__.py +13 -0
pyobo/sources/bigg/bigg_compartment.py +81 -0
pyobo/sources/bigg/bigg_metabolite.py +229 -0
pyobo/sources/bigg/bigg_model.py +46 -0
pyobo/sources/bigg/bigg_reaction.py +77 -0
pyobo/sources/biogrid.py +1 -2
pyobo/sources/ccle.py +7 -12
pyobo/sources/cgnc.py +9 -6
pyobo/sources/chebi.py +1 -1
pyobo/sources/chembl/__init__.py +9 -0
pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
pyobo/sources/chembl/chembl_target.py +160 -0
pyobo/sources/civic_gene.py +55 -15
pyobo/sources/clinicaltrials.py +160 -0
pyobo/sources/complexportal.py +24 -24
pyobo/sources/conso.py +14 -22
pyobo/sources/cpt.py +0 -0
pyobo/sources/credit.py +1 -9
pyobo/sources/cvx.py +27 -5
pyobo/sources/depmap.py +9 -12
pyobo/sources/dictybase_gene.py +2 -7
pyobo/sources/drugbank/__init__.py +9 -0
pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
pyobo/sources/drugcentral.py +17 -13
pyobo/sources/expasy.py +31 -34
pyobo/sources/famplex.py +13 -18
pyobo/sources/flybase.py +8 -13
pyobo/sources/gard.py +62 -0
pyobo/sources/geonames/__init__.py +9 -0
pyobo/sources/geonames/features.py +28 -0
pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
pyobo/sources/geonames/utils.py +115 -0
pyobo/sources/gmt_utils.py +6 -7
pyobo/sources/go.py +20 -13
pyobo/sources/gtdb.py +154 -0
pyobo/sources/gwascentral/__init__.py +9 -0
pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
pyobo/sources/hgnc/__init__.py +9 -0
pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
pyobo/sources/icd/__init__.py +9 -0
pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
pyobo/sources/icd/icd11.py +148 -0
pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
pyobo/sources/interpro.py +4 -9
pyobo/sources/itis.py +0 -5
pyobo/sources/kegg/__init__.py +0 -0
pyobo/sources/kegg/api.py +16 -38
pyobo/sources/kegg/genes.py +9 -20
pyobo/sources/kegg/genome.py +1 -7
pyobo/sources/kegg/pathway.py +9 -21
pyobo/sources/mesh.py +58 -24
pyobo/sources/mgi.py +3 -10
pyobo/sources/mirbase/__init__.py +11 -0
pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
pyobo/sources/msigdb.py +74 -39
pyobo/sources/ncbi/__init__.py +9 -0
pyobo/sources/ncbi/ncbi_gc.py +162 -0
pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
pyobo/sources/nih_reporter.py +60 -0
pyobo/sources/nlm/__init__.py +9 -0
pyobo/sources/nlm/nlm_catalog.py +48 -0
pyobo/sources/nlm/nlm_publisher.py +36 -0
pyobo/sources/nlm/utils.py +116 -0
pyobo/sources/npass.py +6 -8
pyobo/sources/omim_ps.py +11 -4
pyobo/sources/pathbank.py +4 -8
pyobo/sources/pfam/__init__.py +9 -0
pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
pyobo/sources/pharmgkb/__init__.py +15 -0
pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
pyobo/sources/pharmgkb/utils.py +86 -0
pyobo/sources/pid.py +1 -6
pyobo/sources/pombase.py +6 -10
pyobo/sources/pubchem.py +4 -9
pyobo/sources/reactome.py +5 -11
pyobo/sources/rgd.py +11 -16
pyobo/sources/rhea.py +37 -36
pyobo/sources/ror.py +69 -42
pyobo/sources/selventa/__init__.py +0 -0
pyobo/sources/selventa/schem.py +4 -7
pyobo/sources/selventa/scomp.py +1 -6
pyobo/sources/selventa/sdis.py +4 -7
pyobo/sources/selventa/sfam.py +1 -6
pyobo/sources/sgd.py +6 -11
pyobo/sources/signor/__init__.py +7 -0
pyobo/sources/signor/download.py +41 -0
pyobo/sources/signor/signor_complexes.py +105 -0
pyobo/sources/slm.py +12 -15
pyobo/sources/umls/__init__.py +7 -1
pyobo/sources/umls/__main__.py +0 -0
pyobo/sources/umls/get_synonym_types.py +20 -4
pyobo/sources/umls/sty.py +57 -0
pyobo/sources/umls/synonym_types.tsv +1 -1
pyobo/sources/umls/umls.py +18 -22
pyobo/sources/unimod.py +46 -0
pyobo/sources/uniprot/__init__.py +1 -1
pyobo/sources/uniprot/uniprot.py +40 -32
pyobo/sources/uniprot/uniprot_ptm.py +4 -34
pyobo/sources/utils.py +3 -2
pyobo/sources/wikipathways.py +7 -10
pyobo/sources/zfin.py +5 -10
pyobo/ssg/__init__.py +12 -16
pyobo/ssg/base.html +0 -0
pyobo/ssg/index.html +26 -13
pyobo/ssg/term.html +12 -2
pyobo/ssg/typedef.html +0 -0
pyobo/struct/__init__.py +54 -8
pyobo/struct/functional/__init__.py +1 -0
pyobo/struct/functional/dsl.py +2572 -0
pyobo/struct/functional/macros.py +423 -0
pyobo/struct/functional/obo_to_functional.py +385 -0
pyobo/struct/functional/ontology.py +272 -0
pyobo/struct/functional/utils.py +112 -0
pyobo/struct/reference.py +331 -136
pyobo/struct/struct.py +1484 -657
pyobo/struct/struct_utils.py +1078 -0
pyobo/struct/typedef.py +162 -210
pyobo/struct/utils.py +12 -5
pyobo/struct/vocabulary.py +138 -0
pyobo/utils/__init__.py +0 -0
pyobo/utils/cache.py +16 -15
pyobo/utils/io.py +51 -41
pyobo/utils/iter.py +5 -5
pyobo/utils/misc.py +41 -53
pyobo/utils/ndex_utils.py +0 -0
pyobo/utils/path.py +73 -70
pyobo/version.py +3 -3
pyobo-0.12.1.dist-info/METADATA +671 -0
pyobo-0.12.1.dist-info/RECORD +201 -0
pyobo-0.12.1.dist-info/WHEEL +4 -0
{pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
pyobo/aws.py +0 -162
pyobo/cli/aws.py +0 -47
pyobo/identifier_utils.py +0 -142
pyobo/normalizer.py +0 -232
pyobo/registries/__init__.py +0 -16
pyobo/registries/metaregistry.json +0 -507
pyobo/registries/metaregistry.py +0 -135
pyobo/sources/icd11.py +0 -105
pyobo/xrefdb/__init__.py +0 -1
pyobo/xrefdb/canonicalizer.py +0 -214
pyobo/xrefdb/priority.py +0 -59
pyobo/xrefdb/sources/__init__.py +0 -60
pyobo/xrefdb/sources/biomappings.py +0 -36
pyobo/xrefdb/sources/cbms2019.py +0 -91
pyobo/xrefdb/sources/chembl.py +0 -83
pyobo/xrefdb/sources/compath.py +0 -82
pyobo/xrefdb/sources/famplex.py +0 -64
pyobo/xrefdb/sources/gilda.py +0 -50
pyobo/xrefdb/sources/intact.py +0 -113
pyobo/xrefdb/sources/ncit.py +0 -133
pyobo/xrefdb/sources/pubchem.py +0 -27
pyobo/xrefdb/sources/wikidata.py +0 -116
pyobo/xrefdb/xrefs_pipeline.py +0 -180
pyobo-0.11.2.dist-info/METADATA +0 -711
pyobo-0.11.2.dist-info/RECORD +0 -157
pyobo-0.11.2.dist-info/WHEEL +0 -5
pyobo-0.11.2.dist-info/top_level.txt +0 -1

pyobo/reader.py CHANGED Viewed

@@ -1,33 +1,59 @@
 """OBO Readers."""
+from __future__ import annotations
 import logging
+import typing as t
+from collections import Counter
 from collections.abc import Iterable, Mapping
 from datetime import datetime
+from io import StringIO
 from pathlib import Path
-from typing import Any, Optional, Union
+from textwrap import dedent
+from typing import Any
 import bioregistry
 import networkx as nx
+from curies import ReferenceTuple
+from curies.preprocessing import BlocklistError
+from curies.vocabulary import SynonymScope
 from more_itertools import pairwise
 from tqdm.auto import tqdm
 from .constants import DATE_FORMAT, PROVENANCE_PREFIXES
-from .identifier_utils import MissingPrefixError, normalize_curie
-from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix
+from .identifier_utils import (
+    NotCURIEError,
+    ParseError,
+    UnparsableIRIError,
+    _is_valid_identifier,
+    _parse_str_or_curie_or_uri_helper,
+    get_rules,
+)
+from .reader_utils import (
+    _chomp_axioms,
+    _chomp_references,
+    _chomp_specificity,
+    _chomp_typedef,
+    _parse_provenance_list,
+)
 from .struct import (
     Obo,
     Reference,
     Synonym,
-    SynonymSpecificities,
-    SynonymSpecificity,
     SynonymTypeDef,
     Term,
     TypeDef,
+    default_reference,
     make_ad_hoc_ontology,
 )
-from .struct.struct import DEFAULT_SYNONYM_TYPE
-from .struct.typedef import default_typedefs, develops_from, has_part, part_of
-from .utils.misc import cleanup_version
+from .struct import vocabulary as v
+from .struct.reference import OBOLiteral, _obo_parse_identifier
+from .struct.struct_utils import Annotation, Stanza
+from .struct.typedef import comment as has_comment
+from .struct.typedef import default_typedefs, has_ontology_root_term
+from .utils.cache import write_gzipped_graph
+from .utils.io import safe_open
+from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version
 __all__ = [
     "from_obo_path",
@@ -36,369 +62,1026 @@ __all__ = [
 logger = logging.getLogger(__name__)
-# FIXME use bioontologies
-# RELATION_REMAPPINGS: Mapping[str, Tuple[str, str]] = bioontologies.upgrade.load()
-RELATION_REMAPPINGS: Mapping[str, tuple[str, str]] = {
-    "part_of": part_of.pair,
-    "has_part": has_part.pair,
-    "develops_from": develops_from.pair,
-    "seeAlso": ("rdf", "seeAlso"),
-    "dc-contributor": ("dc", "contributor"),
-    "dc-creator": ("dc", "creator"),
-}
 def from_obo_path(
-    path: Union[str, Path], prefix: Optional[str] = None, *, strict: bool = True, **kwargs
+    path: str | Path,
+    prefix: str | None = None,
+    *,
+    strict: bool = False,
+    version: str | None,
+    upgrade: bool = True,
+    use_tqdm: bool = False,
+    ignore_obsolete: bool = False,
+    _cache_path: Path | None = None,
 ) -> Obo:
     """Get the OBO graph from a path."""
-    import obonet
-    logger.info("[%s] parsing with obonet from %s", prefix or "", path)
-    with open(path) as file:
-        graph = obonet.read_obo(
-            tqdm(
-                file,
-                unit_scale=True,
-                desc=f'[{prefix or ""}] parsing obo',
-                disable=None,
-                leave=False,
-            )
-        )
+    path = Path(path).expanduser().resolve()
+    if path.suffix.endswith(".zip"):
+        import io
+        import zipfile
+        logger.info("[%s] parsing zipped OBO with obonet from %s", prefix or "<unknown>", path)
+        with zipfile.ZipFile(path) as zf:
+            with zf.open(path.name.removesuffix(".zip"), "r") as file:
+                content = file.read().decode("utf-8")
+                graph = _read_obo(
+                    io.StringIO(content), prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm
+                )
+    else:
+        logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
+        with safe_open(path, read=True) as file:
+            graph = _read_obo(file, prefix, ignore_obsolete=ignore_obsolete, use_tqdm=use_tqdm)
     if prefix:
         # Make sure the graph is named properly
         _clean_graph_ontology(graph, prefix)
+    if _cache_path:
+        logger.info("[%s] writing obonet cache to %s", prefix, _cache_path)
+        write_gzipped_graph(path=_cache_path, graph=graph)
     # Convert to an Obo instance and return
-    return from_obonet(graph, strict=strict, **kwargs)
+    return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
+def _read_obo(
+    filelike, prefix: str | None, ignore_obsolete: bool, use_tqdm: bool = True
+) -> nx.MultiDiGraph:
+    import obonet
+    return obonet.read_obo(
+        tqdm(
+            filelike,
+            unit_scale=True,
+            desc=f"[{prefix or ''}] parsing OBO",
+            disable=not use_tqdm,
+            leave=True,
+        ),
+        ignore_obsolete=ignore_obsolete,
+    )
-def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> "Obo":
+def _normalize_prefix_strict(prefix: str) -> str:
+    n = bioregistry.normalize_prefix(prefix)
+    if n is None:
+        raise ValueError(f"unknown prefix: {prefix}")
+    return n
+def from_str(
+    text: str,
+    *,
+    strict: bool = False,
+    version: str | None = None,
+    upgrade: bool = True,
+    ignore_obsolete: bool = False,
+    use_tqdm: bool = False,
+) -> Obo:
+    """Read an ontology from a string representation."""
+    import obonet
+    text = dedent(text).strip()
+    io = StringIO()
+    io.write(text)
+    io.seek(0)
+    graph = obonet.read_obo(io, ignore_obsolete=ignore_obsolete)
+    return from_obonet(graph, strict=strict, version=version, upgrade=upgrade, use_tqdm=use_tqdm)
+def from_obonet(
+    graph: nx.MultiDiGraph,
+    *,
+    strict: bool = False,
+    version: str | None = None,
+    upgrade: bool = True,
+    use_tqdm: bool = False,
+) -> Obo:
     """Get all of the terms from a OBO graph."""
-    _ontology = graph.graph["ontology"]
-    ontology = bioregistry.normalize_prefix(_ontology)  # probably always okay
-    if ontology is None:
-        raise ValueError(f"unknown prefix: {_ontology}")
-    logger.info("[%s] extracting OBO using obonet", ontology)
-    date = _get_date(graph=graph, ontology=ontology)
-    name = _get_name(graph=graph, ontology=ontology)
-    data_version = graph.graph.get("data-version")
-    if not data_version:
-        if date is not None:
-            data_version = date.strftime("%Y-%m-%d")
-            logger.info(
-                "[%s] does not report a version. falling back to date: %s",
-                ontology,
-                data_version,
-            )
-        else:
-            logger.warning("[%s] does not report a version nor a date", ontology)
-    else:
-        data_version = cleanup_version(data_version=data_version, prefix=ontology)
-        if data_version is not None:
-            logger.info("[%s] using version %s", ontology, data_version)
-        elif date is not None:
-            logger.info(
-                "[%s] unrecognized version format, falling back to date: %s",
-                ontology,
-                data_version,
-            )
-            data_version = date.strftime("%Y-%m-%d")
-        else:
-            logger.warning(
-                "[%s] UNRECOGNIZED VERSION FORMAT AND MISSING DATE: %s", ontology, data_version
-            )
+    ontology_prefix_raw = graph.graph["ontology"]
+    ontology_prefix = _normalize_prefix_strict(ontology_prefix_raw)
+    logger.info("[%s] extracting OBO using obonet", ontology_prefix)
+    date = _get_date(graph=graph, ontology_prefix=ontology_prefix)
+    name = _get_name(graph=graph, ontology_prefix=ontology_prefix)
+    imports = graph.graph.get("import")
+    macro_config = MacroConfig(graph.graph, strict=strict, ontology_prefix=ontology_prefix)
+    data_version = _clean_graph_version(
+        graph, ontology_prefix=ontology_prefix, version=version, date=date
+    )
     if data_version and "/" in data_version:
-        raise ValueError(f"[{ontology}] will not accept slash in data version: {data_version}")
-    #: Parsed CURIEs to references (even external ones)
-    reference_it = (
-        Reference(
-            prefix=prefix,
-            identifier=bioregistry.standardize_identifier(prefix, identifier),
-            # if name isn't available, it means its external to this ontology
-            name=data.get("name"),
+        raise ValueError(
+            f"[{ontology_prefix}] slashes not allowed in data versions because of filesystem usage: {data_version}"
         )
-        for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict)
-    )
-    references: Mapping[tuple[str, str], Reference] = {
-        reference.pair: reference for reference in reference_it
-    }
+    missing_typedefs: set[ReferenceTuple] = set()
+    subset_typedefs = _get_subsetdefs(graph.graph, ontology_prefix=ontology_prefix)
+    root_terms: list[Reference] = []
+    property_values: list[Annotation] = []
+    for ann in iterate_node_properties(
+        graph.graph,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+        node=Reference(prefix="obo", identifier=ontology_prefix),
+        strict=strict,
+        context="graph property",
+    ):
+        if ann.predicate.pair == has_ontology_root_term.pair:
+            match ann.value:
+                case OBOLiteral():
+                    logger.warning(
+                        "[%s] tried to use a literal as an ontology root: %s",
+                        ontology_prefix,
+                        ann.value.value,
+                    )
+                    continue
+                case Reference():
+                    root_terms.append(ann.value)
+        else:
+            property_values.append(ann)
+    for remark in graph.graph.get("remark", []):
+        property_values.append(Annotation(has_comment.reference, OBOLiteral.string(remark)))
+    idspaces: dict[str, str] = {}
+    for x in graph.graph.get("idspace", []):
+        prefix, uri_prefix, *_ = (y.strip() for y in x.split(" ", 2))
+        idspaces[prefix] = uri_prefix
     #: CURIEs to typedefs
-    typedefs: Mapping[tuple[str, str], TypeDef] = {
-        typedef.pair: typedef for typedef in iterate_graph_typedefs(graph, ontology)
+    typedefs: Mapping[ReferenceTuple, TypeDef] = {
+        typedef.pair: typedef
+        for typedef in iterate_typedefs(
+            graph,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            macro_config=macro_config,
+        )
     }
-    synonym_typedefs: Mapping[str, SynonymTypeDef] = {
-        synonym_typedef.curie: synonym_typedef
-        for synonym_typedef in iterate_graph_synonym_typedefs(graph, ontology=ontology)
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {
+        synonym_typedef.pair: synonym_typedef
+        for synonym_typedef in iterate_graph_synonym_typedefs(
+            graph,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+        )
     }
-    missing_typedefs = set()
+    terms = _get_terms(
+        graph,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+        typedefs=typedefs,
+        missing_typedefs=missing_typedefs,
+        synonym_typedefs=synonym_typedefs,
+        subset_typedefs=subset_typedefs,
+        macro_config=macro_config,
+        use_tqdm=use_tqdm,
+    )
+    return make_ad_hoc_ontology(
+        _ontology=ontology_prefix,
+        _name=name,
+        _auto_generated_by=graph.graph.get("auto-generated-by"),
+        _typedefs=list(typedefs.values()),
+        _synonym_typedefs=list(synonym_typedefs.values()),
+        _date=date,
+        _data_version=data_version,
+        _root_terms=root_terms,
+        terms=terms,
+        _property_values=property_values,
+        _subsetdefs=subset_typedefs,
+        _imports=imports,
+        _idspaces=idspaces,
+    )
+def _get_terms(
+    graph,
+    *,
+    strict: bool,
+    ontology_prefix: str,
+    upgrade: bool,
+    typedefs: Mapping[ReferenceTuple, TypeDef],
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
+    subset_typedefs,
+    missing_typedefs: set[ReferenceTuple],
+    macro_config: MacroConfig,
+    use_tqdm: bool = False,
+) -> list[Term]:
     terms = []
-    n_alt_ids, n_parents, n_synonyms, n_relations, n_properties, n_xrefs = 0, 0, 0, 0, 0, 0
-    for prefix, identifier, data in _iter_obo_graph(graph=graph, strict=strict):
-        if prefix != ontology or not data:
+    for reference, data in _iter_obo_graph(
+        graph=graph,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        use_tqdm=use_tqdm,
+        upgrade=upgrade,
+    ):
+        if reference.prefix != ontology_prefix:
+            continue
+        if not data:
+            # this allows us to skip anything that isn't really defined
+            # caveat: this misses terms that are just defined with an ID
             continue
-        identifier = bioregistry.standardize_identifier(prefix, identifier)
-        reference = references[ontology, identifier]
-        try:
-            node_xrefs = list(iterate_node_xrefs(prefix=prefix, data=data, strict=strict))
-        except MissingPrefixError as e:
-            e.reference = reference
-            raise e
-        xrefs, provenance = [], []
-        for node_xref in node_xrefs:
-            if node_xref.prefix in PROVENANCE_PREFIXES:
-                provenance.append(node_xref)
-            else:
-                xrefs.append(node_xref)
-        n_xrefs += len(xrefs)
+        term = Term(
+            reference=reference,
+            builtin=_get_boolean(data, "builtin"),
+            is_anonymous=_get_boolean(data, "is_anonymous"),
+            is_obsolete=_get_boolean(data, "is_obsolete"),
+            namespace=data.get("namespace"),
+        )
-        definition, definition_references = get_definition(
-            data, prefix=prefix, identifier=identifier
+        _process_alts(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_parents(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_synonyms(
+            term,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            synonym_typedefs=synonym_typedefs,
+        )
+        _process_xrefs(
+            term,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            macro_config=macro_config,
+            upgrade=upgrade,
+        )
+        _process_properties(
+            term,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            typedefs=typedefs,
         )
-        if definition_references:
-            provenance.extend(definition_references)
+        _process_relations(
+            term,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            typedefs=typedefs,
+            missing_typedefs=missing_typedefs,
+        )
+        _process_replaced_by(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_subsets(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_intersection_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_union_of(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_equivalent_to(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_disjoint_from(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_consider(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_comment(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_description(term, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_creation_date(term, data)
-        try:
-            alt_ids = list(iterate_node_alt_ids(data, strict=strict))
-        except MissingPrefixError as e:
-            e.reference = reference
-            raise e
-        n_alt_ids += len(alt_ids)
+        terms.append(term)
+    return terms
-        try:
-            parents = list(
-                iterate_node_parents(
-                    data,
-                    prefix=prefix,
-                    identifier=identifier,
-                    strict=strict,
-                )
-            )
-        except MissingPrefixError as e:
-            e.reference = reference
-            raise e
-        n_parents += len(parents)
-        synonyms = list(
-            iterate_node_synonyms(
-                data,
-                synonym_typedefs,
-                prefix=prefix,
-                identifier=identifier,
-                strict=strict,
+def _process_description(term: Stanza, data, *, ontology_prefix: str, strict: bool):
+    definition, definition_references = get_definition(
+        data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
+    )
+    term.definition = definition
+    if term.definition:
+        for definition_reference in definition_references:
+            term._append_annotation(
+                v.has_description,
+                OBOLiteral.string(term.definition),
+                Annotation(v.has_dbxref, definition_reference),
             )
+def _process_comment(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    if comment := data.get("comment"):
+        term.append_comment(comment)
+def _process_creation_date(term: Stanza, data) -> None:
+    date_str = data.get("creation_date")
+    if not date_str:
+        return
+    if isinstance(date_str, list):
+        date_str = date_str[0]
+    try:
+        term.append_creation_date(date_str)
+    except ValueError:
+        logger.warning("[%s] failed to parse creation_date: %s", term.reference.curie, date_str)
+def _process_union_of(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for reference in iterate_node_reference_tag(
+        "union_of", data=data, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
+    ):
+        term.append_union_of(reference)
+def _process_equivalent_to(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for reference in iterate_node_reference_tag(
+        "equivalent_to",
+        data=data,
+        ontology_prefix=ontology_prefix,
+        strict=strict,
+        node=term.reference,
+    ):
+        term.append_equivalent_to(reference)
+def _process_disjoint_from(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for reference in iterate_node_reference_tag(
+        "disjoint_from",
+        data=data,
+        ontology_prefix=ontology_prefix,
+        strict=strict,
+        node=term.reference,
+    ):
+        term.append_disjoint_from(reference)
+def _process_alts(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for alt_reference in iterate_node_reference_tag(
+        "alt_id", data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
+    ):
+        term.append_alt(alt_reference)
+def _process_parents(term: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for tag in ["is_a", "instance_of"]:
+        for parent in iterate_node_reference_tag(
+            tag, data, node=term.reference, strict=strict, ontology_prefix=ontology_prefix
+        ):
+            term.append_parent(parent)
+def _process_synonyms(
+    term: Stanza,
+    data,
+    *,
+    ontology_prefix: str,
+    strict: bool,
+    upgrade: bool,
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
+) -> None:
+    synonyms = list(
+        iterate_node_synonyms(
+            data,
+            synonym_typedefs,
+            node=term.reference,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
         )
-        n_synonyms += len(synonyms)
+    )
+    for synonym in synonyms:
+        term.append_synonym(synonym)
-        term = Term(
-            reference=reference,
-            definition=definition,
-            parents=parents,
-            synonyms=synonyms,
-            xrefs=xrefs,
-            provenance=provenance,
-            alt_ids=alt_ids,
+def _process_xrefs(
+    term: Stanza,
+    data,
+    *,
+    ontology_prefix: str,
+    strict: bool,
+    macro_config: MacroConfig,
+    upgrade: bool,
+) -> None:
+    for reference, provenance in iterate_node_xrefs(
+        data=data,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        node=term.reference,
+        upgrade=upgrade,
+    ):
+        _handle_xref(term, reference, provenance=provenance, macro_config=macro_config)
+def _process_properties(
+    term: Stanza, data, *, ontology_prefix: str, strict: bool, upgrade: bool, typedefs
+) -> None:
+    for ann in iterate_node_properties(
+        data,
+        node=term.reference,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+        context="stanza property",
+    ):
+        # TODO parse axioms
+        term.append_property(ann)
+def _process_relations(
+    term: Stanza,
+    data,
+    *,
+    ontology_prefix: str,
+    strict: bool,
+    upgrade: bool,
+    typedefs: Mapping[ReferenceTuple, TypeDef],
+    missing_typedefs: set[ReferenceTuple],
+) -> None:
+    relations_references = list(
+        iterate_node_relationships(
+            data,
+            node=term.reference,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
         )
+    )
+    for relation, reference in relations_references:
+        if (
+            relation.pair not in typedefs
+            and relation.pair not in default_typedefs
+            and relation.pair not in missing_typedefs
+        ):
+            missing_typedefs.add(relation.pair)
+            logger.warning("[%s] has no typedef for %s", ontology_prefix, relation.curie)
+            logger.debug("[%s] available typedefs: %s", ontology_prefix, set(typedefs))
+        # TODO parse axioms
+        term.append_relationship(relation, reference)
+def _process_replaced_by(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for reference in iterate_node_reference_tag(
+        "replaced_by", data, node=stanza.reference, strict=strict, ontology_prefix=ontology_prefix
+    ):
+        stanza.append_replaced_by(reference)
+def _process_subsets(stanza: Stanza, data, *, ontology_prefix: str, strict: bool) -> None:
+    for reference in iterate_node_reference_tag(
+        "subset",
+        data,
+        node=stanza.reference,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        counter=SUBSET_ERROR_COUNTER,
+    ):
+        stanza.append_subset(reference)
+def _get_boolean(data: Mapping[str, Any], tag: str) -> bool | None:
+    value = data.get(tag)
+    if value is None:
+        return None
+    if isinstance(value, list):
+        value = value[0]
+    if value == "false":
+        return False
+    if value == "true":
+        return True
+    raise ValueError(value)
+def _get_reference(
+    data: Mapping[str, Any], tag: str, *, ontology_prefix: str, strict: bool, **kwargs
+) -> Reference | None:
+    value = data.get(tag)
+    if value is None:
+        return None
+    if isinstance(value, list):
+        value = value[0]
+    return _obo_parse_identifier(
+        value, ontology_prefix=ontology_prefix, strict=strict, context=tag, **kwargs
+    )
-        try:
-            relations_references = list(
-                iterate_node_relationships(
-                    data,
-                    prefix=ontology,
-                    identifier=identifier,
-                    strict=strict,
+class MacroConfig:
+    """A configuration data class for reader macros."""
+    def __init__(
+        self, data: Mapping[str, list[str]] | None = None, *, strict: bool, ontology_prefix: str
+    ):
+        """Instantiate the configuration from obonet graph metadata."""
+        if data is None:
+            data = {}
+        self.treat_xrefs_as_equivalent: set[str] = set()
+        for prefix in data.get("treat-xrefs-as-equivalent", []):
+            prefix_norm = bioregistry.normalize_prefix(prefix)
+            if prefix_norm is None:
+                continue
+            self.treat_xrefs_as_equivalent.add(prefix_norm)
+        self.treat_xrefs_as_genus_differentia: dict[str, tuple[Reference, Reference]] = {}
+        for line in data.get("treat-xrefs-as-genus-differentia", []):
+            try:
+                gd_prefix, gd_predicate, gd_target = line.split()
+            except ValueError:
+                # this happens in `plana`, where there's an incorrectly written
+                # line `CARO part_of NCBITaxon:79327; CL part_of NCBITaxon:79327`
+                tqdm.write(
+                    f"[{ontology_prefix}] failed to parse treat-xrefs-as-genus-differentia: {line}"
                 )
+                continue
+            gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
+            if gd_prefix_norm is None:
+                continue
+            gd_predicate_re = _obo_parse_identifier(
+                gd_predicate, ontology_prefix=ontology_prefix, strict=strict
             )
-        except MissingPrefixError as e:
-            e.reference = reference
-            raise e
-        for relation, reference in relations_references:
-            if (relation.prefix, relation.identifier) in typedefs:
-                typedef = typedefs[relation.prefix, relation.identifier]
-            elif (relation.prefix, relation.identifier) in default_typedefs:
-                typedef = default_typedefs[relation.prefix, relation.identifier]
-            else:
-                if (relation.prefix, relation.identifier) not in missing_typedefs:
-                    missing_typedefs.add((relation.prefix, relation.identifier))
-                    logger.warning("[%s] has no typedef for %s", ontology, relation)
-                    logger.debug("[%s] available typedefs: %s", ontology, set(typedefs))
+            if gd_predicate_re is None:
+                continue
+            gd_target_re = _obo_parse_identifier(
+                gd_target, ontology_prefix=ontology_prefix, strict=strict
+            )
+            if gd_target_re is None:
+                continue
+            self.treat_xrefs_as_genus_differentia[gd_prefix_norm] = (gd_predicate_re, gd_target_re)
+        self.treat_xrefs_as_relationship: dict[str, Reference] = {}
+        for line in data.get("treat-xrefs-as-relationship", []):
+            try:
+                gd_prefix, gd_predicate = line.split()
+            except ValueError:
+                tqdm.write(
+                    f"[{ontology_prefix}] failed to parse treat-xrefs-as-relationship: {line}"
+                )
                 continue
-            n_relations += 1
-            term.append_relationship(typedef, reference)
-        for prop, value in iterate_node_properties(data, term=term):
-            n_properties += 1
-            term.append_property(prop, value)
-        terms.append(term)
-    logger.info(
-        f"[{ontology}] got {len(references):,} references, {len(typedefs):,} typedefs, {len(terms):,} terms,"
-        f" {n_alt_ids:,} alt ids, {n_parents:,} parents, {n_synonyms:,} synonyms, {n_xrefs:,} xrefs,"
-        f" {n_relations:,} relations, and {n_properties:,} properties",
-    )
+            gd_prefix_norm = bioregistry.normalize_prefix(gd_prefix)
+            if gd_prefix_norm is None:
+                continue
+            gd_predicate_re = _obo_parse_identifier(
+                gd_predicate, ontology_prefix=ontology_prefix, strict=strict
+            )
+            if gd_predicate_re is None:
+                continue
+            self.treat_xrefs_as_relationship[gd_prefix_norm] = gd_predicate_re
-    return make_ad_hoc_ontology(
-        _ontology=ontology,
-        _name=name,
-        _auto_generated_by=graph.graph.get("auto-generated-by"),
-        _format_version=graph.graph.get("format-version"),
-        _typedefs=list(typedefs.values()),
-        _synonym_typedefs=list(synonym_typedefs.values()),
-        _date=date,
-        _data_version=data_version,
-        terms=terms,
-    )
+        self.treat_xrefs_as_is_a: set[str] = set()
+        for prefix in data.get("treat-xrefs-as-is_a", []):
+            gd_prefix_norm = bioregistry.normalize_prefix(prefix)
+            if gd_prefix_norm is None:
+                continue
+            self.treat_xrefs_as_is_a.add(gd_prefix_norm)
+def _handle_xref(
+    term: Stanza,
+    xref: Reference,
+    *,
+    provenance: list[Reference | OBOLiteral],
+    macro_config: MacroConfig | None = None,
+) -> Stanza:
+    annotations = [Annotation(v.has_dbxref, p) for p in provenance]
+    if macro_config is not None:
+        if xref.prefix in macro_config.treat_xrefs_as_equivalent:
+            return term.append_equivalent(xref, annotations=annotations)
+        elif object_property := macro_config.treat_xrefs_as_genus_differentia.get(xref.prefix):
+            # TODO how to add annotations here?
+            if annotations:
+                logger.warning(
+                    "[%s] unable to add provenance to xref upgraded to intersection_of: %s",
+                    term.reference.curie,
+                    xref,
+                )
+            return term.append_intersection_of(xref).append_intersection_of(object_property)
+        elif predicate := macro_config.treat_xrefs_as_relationship.get(xref.prefix):
+            return term.append_relationship(predicate, xref, annotations=annotations)
+        elif xref.prefix in macro_config.treat_xrefs_as_is_a:
+            return term.append_parent(xref, annotations=annotations)
+    # TODO this is not what spec calls for, maybe
+    #  need a flag in macro config for this
+    if xref.prefix in PROVENANCE_PREFIXES:
+        return term.append_provenance(xref, annotations=annotations)
+    return term.append_xref(xref, annotations=annotations)
+SUBSET_ERROR_COUNTER: Counter[tuple[str, str]] = Counter()
+def _get_subsetdefs(graph: nx.MultiDiGraph, ontology_prefix: str) -> list[tuple[Reference, str]]:
+    rv = []
+    for subsetdef in graph.get("subsetdef", []):
+        left, _, right = subsetdef.partition(" ")
+        if not right:
+            logger.warning("[%s] subsetdef did not have two parts", ontology_prefix, subsetdef)
+            continue
+        left_ref = _obo_parse_identifier(
+            left,
+            ontology_prefix=ontology_prefix,
+            name=right,
+            line=subsetdef,
+            counter=SUBSET_ERROR_COUNTER,
+        )
+        if left_ref is None:
+            continue
+        right = right.strip('"')
+        rv.append((left_ref, right))
+    return rv
 def _clean_graph_ontology(graph, prefix: str) -> None:
     """Update the ontology entry in the graph's metadata, if necessary."""
     if "ontology" not in graph.graph:
-        logger.warning('[%s] missing "ontology" key', prefix)
+        logger.debug('[%s] missing "ontology" key', prefix)
         graph.graph["ontology"] = prefix
     elif not graph.graph["ontology"].isalpha():
-        logger.warning(
-            "[%s] ontology=%s has a strange format. replacing with prefix",
+        logger.debug(
+            "[%s] ontology prefix `%s` has a strange format. replacing with prefix",
             prefix,
             graph.graph["ontology"],
         )
         graph.graph["ontology"] = prefix
+def _clean_graph_version(
+    graph, ontology_prefix: str, version: str | None, date: datetime | None
+) -> str | None:
+    if ontology_prefix in STATIC_VERSION_REWRITES:
+        return STATIC_VERSION_REWRITES[ontology_prefix]
+    data_version: str | None = graph.graph.get("data-version") or None
+    if version:
+        clean_injected_version = cleanup_version(version, prefix=ontology_prefix)
+        if not data_version:
+            logger.debug(
+                "[%s] did not have a version, overriding with %s",
+                ontology_prefix,
+                clean_injected_version,
+            )
+            return clean_injected_version
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        if clean_data_version != clean_injected_version:
+            # in this case, we're going to trust the one that's passed
+            # through explicitly more than the graph's content
+            logger.debug(
+                "[%s] had version %s, overriding with %s", ontology_prefix, data_version, version
+            )
+        return clean_injected_version
+    if data_version:
+        clean_data_version = cleanup_version(data_version, prefix=ontology_prefix)
+        logger.debug("[%s] using version %s", ontology_prefix, clean_data_version)
+        return clean_data_version
+    if date is not None:
+        derived_date_version = date.strftime("%Y-%m-%d")
+        logger.debug(
+            "[%s] does not report a version. falling back to date: %s",
+            ontology_prefix,
+            derived_date_version,
+        )
+        return derived_date_version
+    logger.debug("[%s] does not report a version nor a date", ontology_prefix)
+    return None
 def _iter_obo_graph(
     graph: nx.MultiDiGraph,
     *,
-    strict: bool = True,
-) -> Iterable[tuple[str, str, Mapping[str, Any]]]:
+    strict: bool = False,
+    ontology_prefix: str,
+    use_tqdm: bool = False,
+    upgrade: bool,
+) -> Iterable[tuple[Reference, Mapping[str, Any]]]:
     """Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
-    for node, data in graph.nodes(data=True):
-        prefix, identifier = normalize_curie(node, strict=strict)
-        if prefix is None or identifier is None:
-            continue
-        yield prefix, identifier, data
-def _get_date(graph, ontology: str) -> Optional[datetime]:
+    for node, data in tqdm(
+        graph.nodes(data=True), disable=not use_tqdm, unit_scale=True, desc=f"[{ontology_prefix}]"
+    ):
+        name = data.get("name")
+        match _parse_str_or_curie_or_uri_helper(
+            node,
+            ontology_prefix=ontology_prefix,
+            name=name,
+            upgrade=upgrade,
+            context="stanza ID",
+        ):
+            case Reference() as reference:
+                yield reference, data
+            case NotCURIEError() as exc:
+                if _is_valid_identifier(node):
+                    yield default_reference(ontology_prefix, node, name=name), data
+                elif strict:
+                    raise exc
+                else:
+                    logger.warning(str(exc))
+            case ParseError() as exc:
+                if strict:
+                    raise exc
+                else:
+                    logger.warning(str(exc))
+            # if blacklisted, just skip it with no warning
+def _get_date(graph, ontology_prefix: str) -> datetime | None:
     try:
         rv = datetime.strptime(graph.graph["date"], DATE_FORMAT)
     except KeyError:
-        logger.info("[%s] does not report a date", ontology)
+        logger.info("[%s] does not report a date", ontology_prefix)
         return None
     except ValueError:
-        logger.info("[%s] reports a date that can't be parsed: %s", ontology, graph.graph["date"])
+        logger.info(
+            "[%s] reports a date that can't be parsed: %s", ontology_prefix, graph.graph["date"]
+        )
         return None
     else:
         return rv
-def _get_name(graph, ontology: str) -> str:
+def _get_name(graph, ontology_prefix: str) -> str:
     try:
         rv = graph.graph["name"]
     except KeyError:
-        logger.info("[%s] does not report a name", ontology)
-        rv = ontology
+        logger.info("[%s] does not report a name", ontology_prefix)
+        rv = ontology_prefix
     return rv
 def iterate_graph_synonym_typedefs(
-    graph: nx.MultiDiGraph, *, ontology: str, strict: bool = False
+    graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = False, upgrade: bool
 ) -> Iterable[SynonymTypeDef]:
     """Get synonym type definitions from an :mod:`obonet` graph."""
-    for s in graph.graph.get("synonymtypedef", []):
-        sid, name = s.split(" ", 1)
-        name = name.strip().strip('"')
-        if sid.startswith("http://") or sid.startswith("https://"):
-            reference = Reference.from_iri(sid, name=name)
-        elif ":" not in sid:  # assume it's ad-hoc
-            reference = Reference(prefix=ontology, identifier=sid, name=name)
-        else:  # assume it's a curie
-            reference = Reference.from_curie(sid, name=name, strict=strict)
-        if reference is None:
+    for line in graph.graph.get("synonymtypedef", []):
+        # TODO handle trailing comments
+        line, _, specificity = (x.strip() for x in line.rpartition('"'))
+        specificity = specificity.upper()
+        if not specificity:
+            specificity = None
+        elif specificity not in t.get_args(SynonymScope):
             if strict:
-                raise ValueError(f"Could not parse {sid}")
-            else:
-                continue
+                raise ValueError(f"invalid synonym specificty: {specificity}")
+            logger.warning("[%s] invalid synonym specificty: %s", ontology_prefix, specificity)
+            specificity = None
-        yield SynonymTypeDef(reference=reference)
+        curie, name = line.split(" ", 1)
+        # the name should be in quotes, so strip them out
+        name = name.strip().strip('"')
+        # TODO unquote the string?
+        reference = _obo_parse_identifier(
+            curie,
+            ontology_prefix=ontology_prefix,
+            name=name,
+            upgrade=upgrade,
+            strict=strict,
+        )
+        if reference is None:
+            logger.warning("[%s] unable to parse synonym typedef ID %s", ontology_prefix, curie)
+            continue
+        yield SynonymTypeDef(reference=reference, specificity=specificity)
-def iterate_graph_typedefs(
-    graph: nx.MultiDiGraph, default_prefix: str, *, strict: bool = True
+def iterate_typedefs(
+    graph: nx.MultiDiGraph,
+    *,
+    ontology_prefix: str,
+    strict: bool = False,
+    upgrade: bool,
+    macro_config: MacroConfig | None = None,
 ) -> Iterable[TypeDef]:
     """Get type definitions from an :mod:`obonet` graph."""
-    for typedef in graph.graph.get("typedefs", []):
-        if "id" in typedef:
-            curie = typedef["id"]
-        elif "identifier" in typedef:
-            curie = typedef["identifier"]
+    if macro_config is None:
+        macro_config = MacroConfig(strict=strict, ontology_prefix=ontology_prefix)
+    # can't really have a pre-defined set of synonym typedefs here!
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = {}
+    typedefs: Mapping[ReferenceTuple, TypeDef] = {}
+    missing_typedefs: set[ReferenceTuple] = set()
+    for data in graph.graph.get("typedefs", []):
+        if "id" in data:
+            typedef_id = data["id"]
+        elif "identifier" in data:
+            typedef_id = data["identifier"]
         else:
-            raise KeyError
+            raise KeyError("typedef is missing an `id`")
-        name = typedef.get("name")
+        name = data.get("name")
         if name is None:
-            logger.debug("[%s] typedef %s is missing a name", graph.graph["ontology"], curie)
+            logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id)
-        if ":" in curie:
-            reference = Reference.from_curie(curie, name=name, strict=strict)
-        else:
-            reference = Reference(prefix=graph.graph["ontology"], identifier=curie, name=name)
+        reference = _obo_parse_identifier(
+            typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name, upgrade=upgrade
+        )
         if reference is None:
-            logger.warning("[%s] unable to parse typedef CURIE %s", graph.graph["ontology"], curie)
+            logger.warning("[%s] unable to parse typedef ID %s", ontology_prefix, typedef_id)
             continue
-        xrefs = []
-        for curie in typedef.get("xref", []):
-            _xref = Reference.from_curie(curie, strict=strict)
-            if _xref:
-                xrefs.append(_xref)
-        yield TypeDef(reference=reference, xrefs=xrefs)
+        typedef = TypeDef(
+            reference=reference,
+            namespace=data.get("namespace"),
+            is_metadata_tag=_get_boolean(data, "is_metadata_tag"),
+            is_class_level=_get_boolean(data, "is_class_level"),
+            builtin=_get_boolean(data, "builtin"),
+            is_obsolete=_get_boolean(data, "is_obsolete"),
+            is_anonymous=_get_boolean(data, "is_anonymous"),
+            is_anti_symmetric=_get_boolean(data, "is_anti_symmetric"),
+            is_symmetric=_get_boolean(data, "is_symmetric"),
+            is_reflexive=_get_boolean(data, "is_reflexive"),
+            is_cyclic=_get_boolean(data, "is_cyclic"),
+            is_transitive=_get_boolean(data, "is_transitive"),
+            is_functional=_get_boolean(data, "is_functional"),
+            is_inverse_functional=_get_boolean(data, "is_inverse_functional"),
+            domain=_get_reference(data, "domain", ontology_prefix=ontology_prefix, strict=strict),
+            range=_get_reference(data, "range", ontology_prefix=ontology_prefix, strict=strict),
+            inverse=_get_reference(
+                data, "inverse_of", ontology_prefix=ontology_prefix, strict=strict
+            ),
+        )
+        _process_alts(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_parents(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_synonyms(
+            typedef,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            synonym_typedefs=synonym_typedefs,
+        )
+        _process_xrefs(
+            typedef,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            macro_config=macro_config,
+            upgrade=upgrade,
+        )
+        _process_properties(
+            typedef,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            typedefs=typedefs,
+        )
+        _process_relations(
+            typedef,
+            data,
+            ontology_prefix=ontology_prefix,
+            strict=strict,
+            upgrade=upgrade,
+            typedefs=typedefs,
+            missing_typedefs=missing_typedefs,
+        )
+        _process_replaced_by(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_subsets(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_intersection_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_union_of(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_equivalent_to(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_disjoint_from(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_consider(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_comment(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_description(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_creation_date(typedef, data)
+        # the next 4 are typedef-specific
+        _process_equivalent_to_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        _process_holds_over_chain(typedef, data, ontology_prefix=ontology_prefix, strict=strict)
+        typedef.disjoint_over.extend(
+            iterate_node_reference_tag(
+                "disjoint_over",
+                data,
+                node=typedef.reference,
+                ontology_prefix=ontology_prefix,
+                strict=strict,
+            )
+        )
+        typedef.transitive_over.extend(
+            iterate_node_reference_tag(
+                "transitive_over",
+                data,
+                node=typedef.reference,
+                ontology_prefix=ontology_prefix,
+                strict=strict,
+            )
+        )
+        yield typedef
+def _process_consider(stanza: Stanza, data, *, ontology_prefix: str, strict: bool = False):
+    for reference in iterate_node_reference_tag(
+        "consider",
+        data,
+        node=stanza.reference,
+        ontology_prefix=ontology_prefix,
+        strict=strict,
+    ):
+        stanza.append_see_also(reference)
+def _process_equivalent_to_chain(
+    typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
+) -> None:
+    for chain in _iterate_chain(
+        "equivalent_to_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
+    ):
+        typedef.equivalent_to_chain.append(chain)
+def _process_holds_over_chain(
+    typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
+) -> None:
+    for chain in _iterate_chain(
+        "holds_over_chain", typedef, data, ontology_prefix=ontology_prefix, strict=strict
+    ):
+        typedef.holds_over_chain.append(chain)
+def _iterate_chain(
+    tag: str, typedef: TypeDef, data, *, ontology_prefix: str, strict: bool = False
+) -> Iterable[list[Reference]]:
+    for chain in data.get(tag, []):
+        # chain is a list of CURIEs
+        predicate_chain = _process_chain_helper(typedef, chain, ontology_prefix=ontology_prefix)
+        if predicate_chain is None:
+            logger.warning(
+                "[%s - %s] could not parse line: %s: %s",
+                ontology_prefix,
+                typedef.curie,
+                tag,
+                chain,
+            )
+        else:
+            yield predicate_chain
+def _process_chain_helper(
+    term: Stanza, chain: str, ontology_prefix: str, strict: bool = False
+) -> list[Reference] | None:
+    rv = []
+    for curie in chain.split():
+        curie = curie.strip()
+        r = _obo_parse_identifier(
+            curie, ontology_prefix=ontology_prefix, strict=strict, node=term.reference
+        )
+        if r is None:
+            return None
+        rv.append(r)
+    return rv
 def get_definition(
-    data, *, prefix: str, identifier: str
-) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
+    data, *, node: Reference, ontology_prefix: str, strict: bool = False
+) -> tuple[None | str, list[Reference | OBOLiteral]]:
     """Extract the definition from the data."""
     definition = data.get("def")  # it's allowed not to have a definition
     if not definition:
-        return None, None
-    return _extract_definition(definition, prefix=prefix, identifier=identifier)
+        return None, []
+    return _extract_definition(
+        definition, node=node, strict=strict, ontology_prefix=ontology_prefix
+    )
 def _extract_definition(
     s: str,
     *,
-    prefix: str,
-    identifier: str,
+    node: Reference,
     strict: bool = False,
-) -> Union[tuple[None, None], tuple[str, list[Reference]]]:
+    ontology_prefix: str,
+) -> tuple[None | str, list[Reference | OBOLiteral]]:
     """Extract the definitions."""
     if not s.startswith('"'):
-        raise ValueError("definition does not start with a quote")
+        logger.warning(f"[{node.curie}] definition does not start with a quote")
+        return None, []
     try:
         definition, rest = _quote_split(s)
-    except ValueError:
-        logger.warning("[%s:%s] could not parse definition: %s", prefix, identifier, s)
-        return None, None
+    except ValueError as e:
+        logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
+        return None, []
-    if not rest.startswith("[") or not rest.endswith("]"):
-        logger.warning("[%s:%s] problem with definition: %s", prefix, identifier, s)
+    if not rest.startswith("["):
+        logger.debug("[%s] no square brackets for provenance on line: %s", node.curie, s)
         provenance = []
     else:
-        provenance = _parse_trailing_ref_list(rest, strict=strict)
-    return definition, provenance
+        rest = rest.lstrip("[").rstrip("]")  # FIXME this doesn't account for trailing annotations
+        provenance = _parse_provenance_list(
+            rest,
+            node=node,
+            ontology_prefix=ontology_prefix,
+            counter=DEFINITION_PROVENANCE_COUNTER,
+            scope_text="definition provenance",
+            line=s,
+            strict=strict,
+        )
+    return definition or None, provenance
-def _get_first_nonquoted(s: str) -> Optional[int]:
+def get_first_nonescaped_quote(s: str) -> int | None:
+    """Get the first non-escaped quote."""
+    if not s:
+        return None
+    if s[0] == '"':
+        # special case first position
+        return 0
     for i, (a, b) in enumerate(pairwise(s), start=1):
         if b == '"' and a != "\\":
             return i
@@ -406,10 +1089,12 @@ def _get_first_nonquoted(s: str) -> Optional[int]:
 def _quote_split(s: str) -> tuple[str, str]:
-    s = s.lstrip('"')
-    i = _get_first_nonquoted(s)
+    if not s.startswith('"'):
+        raise ValueError(f"'{s}' does not start with a quote")
+    s = s.removeprefix('"')
+    i = get_first_nonescaped_quote(s)
     if i is None:
-        raise ValueError
+        raise ValueError(f"no closing quote found in `{s}`")
     return _clean_definition(s[:i].strip()), s[i + 1 :].strip()
@@ -421,78 +1106,64 @@ def _clean_definition(s: str) -> str:
 def _extract_synonym(
     s: str,
-    synonym_typedefs: Mapping[str, SynonymTypeDef],
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
     *,
-    prefix: str,
-    identifier: str,
-    strict: bool = True,
-) -> Optional[Synonym]:
+    node: Reference,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool,
+) -> Synonym | None:
     # TODO check if the synonym is written like a CURIE... it shouldn't but I've seen it happen
     try:
         name, rest = _quote_split(s)
     except ValueError:
-        logger.warning("[%s:%s] invalid synonym: %s", prefix, identifier, s)
+        logger.warning("[%s] invalid synonym: %s", node.curie, s)
         return None
-    specificity: Optional[SynonymSpecificity] = None
-    for _specificity in SynonymSpecificities:
-        if rest.startswith(_specificity):
-            specificity = _specificity
-            rest = rest[len(_specificity) :].strip()
-            break
-    stype: Optional[SynonymTypeDef] = None
-    for _stype in synonym_typedefs.values():
-        # Since there aren't a lot of carefully defined synonym definitions, it
-        # can appear as a string or curie. Therefore, we might see temporary prefixes
-        # get added, so we should check against full curies as well as local unique
-        # identifiers
-        if rest.startswith(_stype.curie):
-            rest = rest[len(_stype.curie) :].strip()
-            stype = _stype
-            break
-        elif rest.startswith(_stype.preferred_curie):
-            rest = rest[len(_stype.preferred_curie) :].strip()
-            stype = _stype
-            break
-        elif rest.startswith(_stype.identifier):
-            rest = rest[len(_stype.identifier) :].strip()
-            stype = _stype
-            break
-    if not rest.startswith("[") or not rest.endswith("]"):
-        logger.warning("[%s:%s] problem with synonym: %s", prefix, identifier, s)
-        return None
+    specificity, rest = _chomp_specificity(rest)
+    synonym_typedef, rest = _chomp_typedef(
+        rest,
+        synonym_typedefs=synonym_typedefs,
+        strict=strict,
+        node=node,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+    )
+    provenance, rest = _chomp_references(
+        rest,
+        strict=strict,
+        node=node,
+        ontology_prefix=ontology_prefix,
+        line=s,
+    )
+    annotations = _chomp_axioms(rest, node=node, strict=strict)
-    provenance = _parse_trailing_ref_list(rest, strict=strict)
     return Synonym(
         name=name,
-        specificity=specificity or "EXACT",
-        type=stype or DEFAULT_SYNONYM_TYPE,
-        provenance=provenance,
+        specificity=specificity,
+        type=synonym_typedef.reference if synonym_typedef else None,
+        provenance=list(provenance or []),
+        annotations=annotations,
     )
-def _parse_trailing_ref_list(rest, *, strict: bool = True):
-    rest = rest.lstrip("[").rstrip("]")
-    return [
-        Reference.from_curie(curie.strip(), strict=strict)
-        for curie in rest.split(",")
-        if curie.strip()
-    ]
+#: A counter for errors in parsing provenance
+DEFINITION_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()
 def iterate_node_synonyms(
     data: Mapping[str, Any],
-    synonym_typedefs: Mapping[str, SynonymTypeDef],
+    synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef],
     *,
-    prefix: str,
-    identifier: str,
+    node: Reference,
     strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool,
 ) -> Iterable[Synonym]:
     """Extract synonyms from a :mod:`obonet` node's data.
-    Example strings:
+    Example strings
     - "LTEC I" EXACT [Orphanet:93938,DOI:xxxx]
     - "LTEC I" EXACT [Orphanet:93938]
     - "LTEC I" [Orphanet:93938]
@@ -500,121 +1171,409 @@ def iterate_node_synonyms(
     """
     for s in data.get("synonym", []):
         s = _extract_synonym(
-            s, synonym_typedefs, prefix=prefix, identifier=identifier, strict=strict
+            s,
+            synonym_typedefs,
+            node=node,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
         )
         if s is not None:
             yield s
-HANDLED_PROPERTY_TYPES = {
-    "xsd:string": str,
-    "xsd:dateTime": datetime,
-}
 def iterate_node_properties(
-    data: Mapping[str, Any], *, property_prefix: Optional[str] = None, term=None
-) -> Iterable[tuple[str, str]]:
+    data: Mapping[str, Any],
+    *,
+    node: Reference,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool,
+    context: str,
+) -> Iterable[Annotation]:
     """Extract properties from a :mod:`obonet` node's data."""
     for prop_value_type in data.get("property_value", []):
-        try:
-            prop, value_type = prop_value_type.split(" ", 1)
-        except ValueError:
-            logger.info("malformed property: %s on %s", prop_value_type, term and term.curie)
-            continue
-        if property_prefix is not None and prop.startswith(property_prefix):
-            prop = prop[len(property_prefix) :]
+        if yv := _handle_prop(
+            prop_value_type,
+            node=node,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
+            context=context,
+        ):
+            yield yv
+#: Keep track of property-value pairs for which the value couldn't be parsed,
+#: such as `dc:conformsTo autoimmune:inflammation.yaml` in MONDO
+UNHANDLED_PROP_OBJECTS: Counter[tuple[str, str]] = Counter()
+UNHANDLED_PROPS: Counter[tuple[str, str]] = Counter()
+def _handle_prop(
+    prop_value_type: str,
+    *,
+    node: Reference,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool,
+    context: str | None,
+) -> Annotation | None:
+    try:
+        prop, value_type = prop_value_type.split(" ", 1)
+    except ValueError:
+        logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type)
+        return None
+    prop_reference = _get_prop(
+        prop,
+        node=node,
+        strict=strict,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+        line=prop_value_type,
+        counter=UNHANDLED_PROPS,
+        context=context,
+    )
+    if prop_reference is None:
+        return None
+    value_type = value_type.strip()
+    datatype: Reference | None
+    if " " not in value_type:
+        value, datatype = value_type, None
+    else:
+        value, datatype_raw = (s.strip() for s in value_type.rsplit(" ", 1))
+        match _parse_str_or_curie_or_uri_helper(
+            datatype_raw,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=prop_reference,
+            line=prop_value_type,
+            upgrade=upgrade,
+            context="property datatype",
+        ):
+            case Reference() as datatype_:
+                datatype = datatype_
+            case BlocklistError():
+                return None
+            case ParseError() as exc:
+                if strict:
+                    raise exc
+                else:
+                    logger.warning(str(exc))
+                    return None
+    # if it's an empty string, like the ones removed in https://github.com/oborel/obo-relations/pull/830,
+    # just quit
+    if value == '""':
+        return None
+    quoted = value.startswith('"') and value.endswith('"')
+    value = value.strip('"').strip()
+    # first, special case datetimes. Whether it's quoted or not,
+    # we always deal with this first
+    if datatype and datatype.curie == "xsd:dateTime":
         try:
-            value, _ = value_type.rsplit(" ", 1)  # second entry is the value type
+            obo_literal = OBOLiteral.datetime(value)
         except ValueError:
-            # logger.debug(f'property missing datatype. defaulting to string - {prop_value_type}')
-            value = value_type  # could assign type to be 'xsd:string' by default
-        value = value.strip('"')
-        yield prop, value
+            logger.warning(
+                "[%s - %s] could not parse date: %s", node.curie, prop_reference.curie, value
+            )
+            return None
+        else:
+            return Annotation(prop_reference, obo_literal)
+    if datatype and datatype.curie == "xsd:anyURI":
+        match _parse_str_or_curie_or_uri_helper(
+            value,
+            node=node,
+            predicate=prop_reference,
+            ontology_prefix=ontology_prefix,
+            line=prop_value_type,
+            upgrade=upgrade,
+            context="property object",
+        ):
+            case Reference() as obj_reference:
+                return Annotation(prop_reference, obj_reference)
+            case BlocklistError():
+                return None
+            case UnparsableIRIError():
+                return Annotation(prop_reference, OBOLiteral.uri(value))
+            case ParseError() as exc:
+                if strict:
+                    raise exc
+                else:
+                    logger.warning(str(exc))
+                    return None
+    # if it's quoted and there's a data try parsing as a CURIE/URI anyway (this is a bit
+    # aggressive, but more useful than spec).
+    if quoted:
+        # give a try parsing it anyway, just in case ;)
+        match _parse_str_or_curie_or_uri_helper(
+            value,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            line=prop_value_type,
+            upgrade=upgrade,
+            predicate=prop_reference,
+            context="property object",
+        ):
+            case Reference() as obj_reference:
+                return Annotation(prop_reference, obj_reference)
+            case BlocklistError():
+                return None
+            case ParseError():
+                if datatype:
+                    return Annotation(prop_reference, OBOLiteral(value, datatype, None))
+                else:
+                    return Annotation(prop_reference, OBOLiteral.string(value))
+    else:
+        if datatype:
+            logger.debug(
+                "[%s] throwing away datatype since no quotes were used: %s", node.curie, value_type
+            )
+        # if it wasn't quoted and there was no datatype, go for parsing as an object
+        match _obo_parse_identifier(
+            value,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=prop_reference,
+            line=prop_value_type,
+            context="property object",
+            counter=UNHANDLED_PROP_OBJECTS,
+        ):
+            case Reference() as obj_reference:
+                return Annotation(prop_reference, obj_reference)
+            case None:
+                return None
+def _get_prop(
+    property_id: str,
+    *,
+    node: Reference,
+    strict: bool,
+    ontology_prefix: str,
+    upgrade: bool,
+    line: str,
+    counter: Counter[tuple[str, str]] | None = None,
+    context: str | None = None,
+) -> Reference | None:
+    if rv := _parse_default_prop(property_id, ontology_prefix):
+        return rv
+    return _obo_parse_identifier(
+        property_id,
+        strict=strict,
+        node=node,
+        ontology_prefix=ontology_prefix,
+        upgrade=upgrade,
+        counter=counter,
+        context=context,
+        line=line,
+    )
+def _parse_default_prop(property_id, ontology_prefix) -> Reference | None:
+    for delim in "#/":
+        sw = f"http://purl.obolibrary.org/obo/{ontology_prefix}{delim}"
+        if property_id.startswith(sw):
+            identifier = property_id.removeprefix(sw)
+            return default_reference(ontology_prefix, identifier)
+    return None
-def iterate_node_parents(
+def iterate_node_reference_tag(
+    tag: str,
     data: Mapping[str, Any],
     *,
-    prefix: str,
-    identifier: str,
-    strict: bool = True,
+    node: Reference,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool = True,
+    counter: Counter[tuple[str, str]] | None = None,
 ) -> Iterable[Reference]:
-    """Extract parents from a :mod:`obonet` node's data."""
-    for parent_curie in data.get("is_a", []):
-        reference = Reference.from_curie(parent_curie, strict=strict)
+    """Extract a list of CURIEs from the data."""
+    for identifier in data.get(tag, []):
+        reference = _obo_parse_identifier(
+            identifier,
+            strict=strict,
+            node=node,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
+            counter=counter,
+        )
         if reference is None:
             logger.warning(
-                "[%s:%s] could not parse parent curie: %s", prefix, identifier, parent_curie
+                "[%s] %s - could not parse identifier: %s", ontology_prefix, tag, identifier
             )
-            continue
-        yield reference
+        else:
+            yield reference
-def iterate_node_alt_ids(data: Mapping[str, Any], *, strict: bool = True) -> Iterable[Reference]:
-    """Extract alternate identifiers from a :mod:`obonet` node's data."""
-    for curie in data.get("alt_id", []):
-        reference = Reference.from_curie(curie, strict=strict)
-        if reference is not None:
-            yield reference
+def _process_intersection_of(
+    term: Stanza,
+    data: Mapping[str, Any],
+    *,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool = True,
+) -> None:
+    """Extract a list of CURIEs from the data."""
+    for line in data.get("intersection_of", []):
+        predicate_id, _, target_id = line.partition(" ")
+        predicate = _obo_parse_identifier(
+            predicate_id,
+            strict=strict,
+            node=term.reference,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
+        )
+        if predicate is None:
+            logger.warning("[%s] - could not parse intersection_of: %s", ontology_prefix, line)
+            continue
+        if target_id:
+            # this means that there's a second part, so let's try parsing it
+            target = _obo_parse_identifier(
+                target_id,
+                strict=strict,
+                node=term.reference,
+                predicate=predicate,
+                ontology_prefix=ontology_prefix,
+                upgrade=upgrade,
+            )
+            if target is None:
+                logger.warning(
+                    "[%s] could not parse intersection_of target: %s", ontology_prefix, line
+                )
+                continue
+            term.append_intersection_of(predicate, target)
+        else:
+            term.append_intersection_of(predicate)
 def iterate_node_relationships(
     data: Mapping[str, Any],
     *,
-    prefix: str,
-    identifier: str,
-    strict: bool = True,
+    node: Reference,
+    strict: bool = False,
+    ontology_prefix: str,
+    upgrade: bool,
 ) -> Iterable[tuple[Reference, Reference]]:
     """Extract relationships from a :mod:`obonet` node's data."""
-    for s in data.get("relationship", []):
-        relation_curie, target_curie = s.split(" ")
-        relation_prefix: Optional[str]
-        relation_identifier: Optional[str]
-        if relation_curie in RELATION_REMAPPINGS:
-            relation_prefix, relation_identifier = RELATION_REMAPPINGS[relation_curie]
-        else:
-            relation_prefix, relation_identifier = normalize_curie(relation_curie, strict=strict)
-        if relation_prefix is not None and relation_identifier is not None:
-            relation = Reference(prefix=relation_prefix, identifier=relation_identifier)
-        elif prefix is not None:
-            relation = Reference(prefix=prefix, identifier=relation_curie)
-        else:
-            logger.debug("unhandled relation: %s", relation_curie)
-            relation = Reference(prefix="obo", identifier=relation_curie)
-        # TODO replace with omni-parser from :mod:`curies`
-        target = Reference.from_curie(target_curie, strict=strict)
-        if target is None:
-            logger.warning(
-                "[%s:%s] %s could not parse target %s", prefix, identifier, relation, target_curie
-            )
-            continue
+    for line in data.get("relationship", []):
+        relation_curie, target_curie = line.split(" ")
+        predicate = _obo_parse_identifier(
+            relation_curie,
+            strict=strict,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            upgrade=upgrade,
+            line=line,
+            context="relationship predicate",
+        )
+        match predicate:
+            # TODO extend with other exception handling
+            case None:
+                logger.warning("[%s] could not parse relation %s", node.curie, relation_curie)
+                continue
-        yield relation, target
+        match _parse_str_or_curie_or_uri_helper(
+            target_curie,
+            ontology_prefix=ontology_prefix,
+            node=node,
+            predicate=predicate,
+            line=line,
+            context="relationship target",
+            upgrade=upgrade,
+        ):
+            case Reference() as target:
+                yield predicate, target
+            case ParseError() as exc:
+                if strict:
+                    raise exc
+                else:
+                    logger.warning(str(exc))
 def iterate_node_xrefs(
-    *, prefix: str, data: Mapping[str, Any], strict: bool = True
-) -> Iterable[Reference]:
+    *,
+    data: Mapping[str, Any],
+    strict: bool = False,
+    ontology_prefix: str,
+    node: Reference,
+    upgrade: bool,
+) -> Iterable[tuple[Reference, list[Reference | OBOLiteral]]]:
     """Extract xrefs from a :mod:`obonet` node's data."""
-    for xref in data.get("xref", []):
-        xref = xref.strip()
-        if curie_has_blacklisted_prefix(xref) or curie_is_blacklisted(xref) or ":" not in xref:
-            continue  # sometimes xref to self... weird
+    for line in data.get("xref", []):
+        line = line.strip()
+        if pair := _parse_xref_line(
+            line.strip(),
+            strict=strict,
+            node=node,
+            ontology_prefix=ontology_prefix,
+            upgrade=upgrade,
+        ):
+            yield pair
+def _parse_xref_line(
+    line: str, *, strict: bool = False, ontology_prefix: str, node: Reference, upgrade: bool
+) -> tuple[Reference, list[Reference | OBOLiteral]] | None:
+    xref, _, rest = line.partition(" [")
+    rules = get_rules()
+    if rules.str_is_blocked(xref, context=ontology_prefix) or ":" not in xref:
+        return None  # sometimes xref to self... weird
+    xref = rules.remap_prefix(xref, context=ontology_prefix)
+    split_space = " " in xref
+    if split_space:
+        _xref_split = xref.split(" ", 1)
+        if _xref_split[1][0] not in {'"', "("}:
+            logger.debug("[%s] Problem with space in xref %s", node.curie, xref)
+            return None
+        xref = _xref_split[0]
+    xref_ref = _parse_str_or_curie_or_uri_helper(
+        xref, ontology_prefix=ontology_prefix, node=node, line=line, context="xref", upgrade=upgrade
+    )
+    match xref_ref:
+        case BlocklistError():
+            return None
+        case ParseError() as exc:
+            if strict:
+                raise exc
+            else:
+                if not XREF_PROVENANCE_COUNTER[ontology_prefix, xref]:
+                    logger.warning(str(exc))
+                XREF_PROVENANCE_COUNTER[ontology_prefix, xref] += 1
+                return None
+    if rest:
+        rest_front, _, _rest_rest = rest.partition("]")
+        provenance = _parse_provenance_list(
+            rest_front,
+            node=node,
+            ontology_prefix=ontology_prefix,
+            counter=XREF_PROVENANCE_COUNTER,
+            scope_text="xref provenance",
+            line=line,
+            strict=strict,
+        )
+    else:
+        provenance = []
-        xref = remap_prefix(xref)
+    return xref_ref, provenance
-        split_space = " " in xref
-        if split_space:
-            _xref_split = xref.split(" ", 1)
-            if _xref_split[1][0] not in {'"', "("}:
-                logger.debug("[%s] Problem with space in xref %s", prefix, xref)
-                continue
-            xref = _xref_split[0]
-        yv = Reference.from_curie(xref, strict=strict)
-        if yv is not None:
-            yield yv
+XREF_PROVENANCE_COUNTER: Counter[tuple[str, str]] = Counter()

pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl

pyobo 0.11.2py3-none-any.whl → 0.12.1py3-none-any.whl