pyobo 0.11.2__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/.DS_Store +0 -0
- pyobo/__init__.py +95 -20
- pyobo/__main__.py +0 -0
- pyobo/api/__init__.py +81 -10
- pyobo/api/alts.py +52 -42
- pyobo/api/combine.py +39 -0
- pyobo/api/edges.py +68 -0
- pyobo/api/hierarchy.py +231 -203
- pyobo/api/metadata.py +14 -19
- pyobo/api/names.py +207 -127
- pyobo/api/properties.py +117 -117
- pyobo/api/relations.py +68 -94
- pyobo/api/species.py +24 -21
- pyobo/api/typedefs.py +11 -11
- pyobo/api/utils.py +66 -13
- pyobo/api/xrefs.py +107 -114
- pyobo/cli/__init__.py +0 -0
- pyobo/cli/cli.py +35 -50
- pyobo/cli/database.py +210 -160
- pyobo/cli/database_utils.py +155 -0
- pyobo/cli/lookup.py +163 -195
- pyobo/cli/utils.py +19 -6
- pyobo/constants.py +102 -3
- pyobo/getters.py +209 -191
- pyobo/gilda_utils.py +52 -250
- pyobo/identifier_utils/__init__.py +33 -0
- pyobo/identifier_utils/api.py +305 -0
- pyobo/identifier_utils/preprocessing.json +873 -0
- pyobo/identifier_utils/preprocessing.py +27 -0
- pyobo/identifier_utils/relations/__init__.py +8 -0
- pyobo/identifier_utils/relations/api.py +162 -0
- pyobo/identifier_utils/relations/data.json +5824 -0
- pyobo/identifier_utils/relations/data_owl.json +57 -0
- pyobo/identifier_utils/relations/data_rdf.json +1 -0
- pyobo/identifier_utils/relations/data_rdfs.json +7 -0
- pyobo/mocks.py +9 -6
- pyobo/ner/__init__.py +9 -0
- pyobo/ner/api.py +72 -0
- pyobo/ner/normalizer.py +33 -0
- pyobo/obographs.py +48 -40
- pyobo/plugins.py +5 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +1354 -395
- pyobo/reader_utils.py +155 -0
- pyobo/resource_utils.py +42 -22
- pyobo/resources/__init__.py +0 -0
- pyobo/resources/goc.py +75 -0
- pyobo/resources/goc.tsv +188 -0
- pyobo/resources/ncbitaxon.py +4 -5
- pyobo/resources/ncbitaxon.tsv.gz +0 -0
- pyobo/resources/ro.py +3 -2
- pyobo/resources/ro.tsv +0 -0
- pyobo/resources/so.py +0 -0
- pyobo/resources/so.tsv +0 -0
- pyobo/sources/README.md +12 -8
- pyobo/sources/__init__.py +52 -29
- pyobo/sources/agrovoc.py +0 -0
- pyobo/sources/antibodyregistry.py +11 -12
- pyobo/sources/bigg/__init__.py +13 -0
- pyobo/sources/bigg/bigg_compartment.py +81 -0
- pyobo/sources/bigg/bigg_metabolite.py +229 -0
- pyobo/sources/bigg/bigg_model.py +46 -0
- pyobo/sources/bigg/bigg_reaction.py +77 -0
- pyobo/sources/biogrid.py +1 -2
- pyobo/sources/ccle.py +7 -12
- pyobo/sources/cgnc.py +9 -6
- pyobo/sources/chebi.py +1 -1
- pyobo/sources/chembl/__init__.py +9 -0
- pyobo/sources/{chembl.py → chembl/chembl_compound.py} +13 -25
- pyobo/sources/chembl/chembl_target.py +160 -0
- pyobo/sources/civic_gene.py +55 -15
- pyobo/sources/clinicaltrials.py +160 -0
- pyobo/sources/complexportal.py +24 -24
- pyobo/sources/conso.py +14 -22
- pyobo/sources/cpt.py +0 -0
- pyobo/sources/credit.py +1 -9
- pyobo/sources/cvx.py +27 -5
- pyobo/sources/depmap.py +9 -12
- pyobo/sources/dictybase_gene.py +2 -7
- pyobo/sources/drugbank/__init__.py +9 -0
- pyobo/sources/{drugbank.py → drugbank/drugbank.py} +11 -16
- pyobo/sources/{drugbank_salt.py → drugbank/drugbank_salt.py} +3 -8
- pyobo/sources/drugcentral.py +17 -13
- pyobo/sources/expasy.py +31 -34
- pyobo/sources/famplex.py +13 -18
- pyobo/sources/flybase.py +8 -13
- pyobo/sources/gard.py +62 -0
- pyobo/sources/geonames/__init__.py +9 -0
- pyobo/sources/geonames/features.py +28 -0
- pyobo/sources/{geonames.py → geonames/geonames.py} +87 -26
- pyobo/sources/geonames/utils.py +115 -0
- pyobo/sources/gmt_utils.py +6 -7
- pyobo/sources/go.py +20 -13
- pyobo/sources/gtdb.py +154 -0
- pyobo/sources/gwascentral/__init__.py +9 -0
- pyobo/sources/{gwascentral_phenotype.py → gwascentral/gwascentral_phenotype.py} +5 -7
- pyobo/sources/{gwascentral_study.py → gwascentral/gwascentral_study.py} +1 -7
- pyobo/sources/hgnc/__init__.py +9 -0
- pyobo/sources/{hgnc.py → hgnc/hgnc.py} +56 -70
- pyobo/sources/{hgncgenefamily.py → hgnc/hgncgenefamily.py} +8 -18
- pyobo/sources/icd/__init__.py +9 -0
- pyobo/sources/{icd10.py → icd/icd10.py} +35 -37
- pyobo/sources/icd/icd11.py +148 -0
- pyobo/sources/{icd_utils.py → icd/icd_utils.py} +66 -20
- pyobo/sources/interpro.py +4 -9
- pyobo/sources/itis.py +0 -5
- pyobo/sources/kegg/__init__.py +0 -0
- pyobo/sources/kegg/api.py +16 -38
- pyobo/sources/kegg/genes.py +9 -20
- pyobo/sources/kegg/genome.py +1 -7
- pyobo/sources/kegg/pathway.py +9 -21
- pyobo/sources/mesh.py +58 -24
- pyobo/sources/mgi.py +3 -10
- pyobo/sources/mirbase/__init__.py +11 -0
- pyobo/sources/{mirbase.py → mirbase/mirbase.py} +8 -11
- pyobo/sources/{mirbase_constants.py → mirbase/mirbase_constants.py} +0 -0
- pyobo/sources/{mirbase_family.py → mirbase/mirbase_family.py} +4 -8
- pyobo/sources/{mirbase_mature.py → mirbase/mirbase_mature.py} +3 -7
- pyobo/sources/msigdb.py +74 -39
- pyobo/sources/ncbi/__init__.py +9 -0
- pyobo/sources/ncbi/ncbi_gc.py +162 -0
- pyobo/sources/{ncbigene.py → ncbi/ncbigene.py} +18 -19
- pyobo/sources/nih_reporter.py +60 -0
- pyobo/sources/nlm/__init__.py +9 -0
- pyobo/sources/nlm/nlm_catalog.py +48 -0
- pyobo/sources/nlm/nlm_publisher.py +36 -0
- pyobo/sources/nlm/utils.py +116 -0
- pyobo/sources/npass.py +6 -8
- pyobo/sources/omim_ps.py +11 -4
- pyobo/sources/pathbank.py +4 -8
- pyobo/sources/pfam/__init__.py +9 -0
- pyobo/sources/{pfam.py → pfam/pfam.py} +3 -8
- pyobo/sources/{pfam_clan.py → pfam/pfam_clan.py} +2 -7
- pyobo/sources/pharmgkb/__init__.py +15 -0
- pyobo/sources/pharmgkb/pharmgkb_chemical.py +89 -0
- pyobo/sources/pharmgkb/pharmgkb_disease.py +77 -0
- pyobo/sources/pharmgkb/pharmgkb_gene.py +108 -0
- pyobo/sources/pharmgkb/pharmgkb_pathway.py +63 -0
- pyobo/sources/pharmgkb/pharmgkb_variant.py +84 -0
- pyobo/sources/pharmgkb/utils.py +86 -0
- pyobo/sources/pid.py +1 -6
- pyobo/sources/pombase.py +6 -10
- pyobo/sources/pubchem.py +4 -9
- pyobo/sources/reactome.py +5 -11
- pyobo/sources/rgd.py +11 -16
- pyobo/sources/rhea.py +37 -36
- pyobo/sources/ror.py +69 -42
- pyobo/sources/selventa/__init__.py +0 -0
- pyobo/sources/selventa/schem.py +4 -7
- pyobo/sources/selventa/scomp.py +1 -6
- pyobo/sources/selventa/sdis.py +4 -7
- pyobo/sources/selventa/sfam.py +1 -6
- pyobo/sources/sgd.py +6 -11
- pyobo/sources/signor/__init__.py +7 -0
- pyobo/sources/signor/download.py +41 -0
- pyobo/sources/signor/signor_complexes.py +105 -0
- pyobo/sources/slm.py +12 -15
- pyobo/sources/umls/__init__.py +7 -1
- pyobo/sources/umls/__main__.py +0 -0
- pyobo/sources/umls/get_synonym_types.py +20 -4
- pyobo/sources/umls/sty.py +57 -0
- pyobo/sources/umls/synonym_types.tsv +1 -1
- pyobo/sources/umls/umls.py +18 -22
- pyobo/sources/unimod.py +46 -0
- pyobo/sources/uniprot/__init__.py +1 -1
- pyobo/sources/uniprot/uniprot.py +40 -32
- pyobo/sources/uniprot/uniprot_ptm.py +4 -34
- pyobo/sources/utils.py +3 -2
- pyobo/sources/wikipathways.py +7 -10
- pyobo/sources/zfin.py +5 -10
- pyobo/ssg/__init__.py +12 -16
- pyobo/ssg/base.html +0 -0
- pyobo/ssg/index.html +26 -13
- pyobo/ssg/term.html +12 -2
- pyobo/ssg/typedef.html +0 -0
- pyobo/struct/__init__.py +54 -8
- pyobo/struct/functional/__init__.py +1 -0
- pyobo/struct/functional/dsl.py +2572 -0
- pyobo/struct/functional/macros.py +423 -0
- pyobo/struct/functional/obo_to_functional.py +385 -0
- pyobo/struct/functional/ontology.py +272 -0
- pyobo/struct/functional/utils.py +112 -0
- pyobo/struct/reference.py +331 -136
- pyobo/struct/struct.py +1484 -657
- pyobo/struct/struct_utils.py +1078 -0
- pyobo/struct/typedef.py +162 -210
- pyobo/struct/utils.py +12 -5
- pyobo/struct/vocabulary.py +138 -0
- pyobo/utils/__init__.py +0 -0
- pyobo/utils/cache.py +16 -15
- pyobo/utils/io.py +51 -41
- pyobo/utils/iter.py +5 -5
- pyobo/utils/misc.py +41 -53
- pyobo/utils/ndex_utils.py +0 -0
- pyobo/utils/path.py +73 -70
- pyobo/version.py +3 -3
- pyobo-0.12.1.dist-info/METADATA +671 -0
- pyobo-0.12.1.dist-info/RECORD +201 -0
- pyobo-0.12.1.dist-info/WHEEL +4 -0
- {pyobo-0.11.2.dist-info → pyobo-0.12.1.dist-info}/entry_points.txt +1 -0
- pyobo-0.12.1.dist-info/licenses/LICENSE +21 -0
- pyobo/aws.py +0 -162
- pyobo/cli/aws.py +0 -47
- pyobo/identifier_utils.py +0 -142
- pyobo/normalizer.py +0 -232
- pyobo/registries/__init__.py +0 -16
- pyobo/registries/metaregistry.json +0 -507
- pyobo/registries/metaregistry.py +0 -135
- pyobo/sources/icd11.py +0 -105
- pyobo/xrefdb/__init__.py +0 -1
- pyobo/xrefdb/canonicalizer.py +0 -214
- pyobo/xrefdb/priority.py +0 -59
- pyobo/xrefdb/sources/__init__.py +0 -60
- pyobo/xrefdb/sources/biomappings.py +0 -36
- pyobo/xrefdb/sources/cbms2019.py +0 -91
- pyobo/xrefdb/sources/chembl.py +0 -83
- pyobo/xrefdb/sources/compath.py +0 -82
- pyobo/xrefdb/sources/famplex.py +0 -64
- pyobo/xrefdb/sources/gilda.py +0 -50
- pyobo/xrefdb/sources/intact.py +0 -113
- pyobo/xrefdb/sources/ncit.py +0 -133
- pyobo/xrefdb/sources/pubchem.py +0 -27
- pyobo/xrefdb/sources/wikidata.py +0 -116
- pyobo/xrefdb/xrefs_pipeline.py +0 -180
- pyobo-0.11.2.dist-info/METADATA +0 -711
- pyobo-0.11.2.dist-info/RECORD +0 -157
- pyobo-0.11.2.dist-info/WHEEL +0 -5
- pyobo-0.11.2.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Utilities for functional OWL."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
|
|
8
|
+
import curies
|
|
9
|
+
import rdflib
|
|
10
|
+
from curies import Converter, Reference
|
|
11
|
+
from rdflib import OWL, RDF, Graph, term
|
|
12
|
+
|
|
13
|
+
from pyobo.constants import DEFAULT_PREFIX_MAP
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"EXAMPLE_PREFIX_MAP",
|
|
17
|
+
"FunctionalOWLSerializable",
|
|
18
|
+
"RDFNodeSerializable",
|
|
19
|
+
"get_rdf_graph",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
EXAMPLE_PREFIX_MAP = {
|
|
24
|
+
**DEFAULT_PREFIX_MAP,
|
|
25
|
+
"oboInOwl": "http://www.geneontology.org/formats/oboInOwl#",
|
|
26
|
+
"dcterms": "http://purl.org/dc/terms/",
|
|
27
|
+
"obo": "http://purl.obolibrary.org/obo/",
|
|
28
|
+
"OMO": "http://purl.obolibrary.org/obo/OMO_",
|
|
29
|
+
"sssom": "https://w3id.org/sssom/",
|
|
30
|
+
"semapv": "https://w3id.org/semapv/vocab/",
|
|
31
|
+
"skos": "http://www.w3.org/2004/02/skos/core#",
|
|
32
|
+
#
|
|
33
|
+
"a": "https://example.org/a:",
|
|
34
|
+
"orcid": "https://orcid.org",
|
|
35
|
+
"ZFA": "http://purl.obolibrary.org/obo/ZFA_",
|
|
36
|
+
"CL": "http://purl.obolibrary.org/obo/CL_",
|
|
37
|
+
"BFO": "http://purl.obolibrary.org/obo/BFO_",
|
|
38
|
+
"NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FunctionalOWLSerializable(ABC):
|
|
43
|
+
"""An object that can be serialized to functional OWL."""
|
|
44
|
+
|
|
45
|
+
def to_funowl(self) -> str:
|
|
46
|
+
"""Make functional OWL."""
|
|
47
|
+
tag = self.__class__.__name__
|
|
48
|
+
return f"{tag}({self.to_funowl_args()})"
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def to_funowl_args(self) -> str:
|
|
52
|
+
"""Make a string representing the positional arguments inside a box."""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class RDFNodeSerializable(ABC):
|
|
56
|
+
"""An object that can be serialized to RDF as a node."""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def to_rdflib_node(self, graph: Graph, converter: Converter):
|
|
60
|
+
"""Make RDF."""
|
|
61
|
+
|
|
62
|
+
def to_ttl(self, prefix_map: dict[str, str], *, output_prefixes: bool = False) -> str:
|
|
63
|
+
"""Output terse Turtle statements."""
|
|
64
|
+
return serialize_turtle([self], output_prefixes=output_prefixes, prefix_map=prefix_map)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
EXAMPLE_ONTOLOGY_IRI = "https://example.org/example.ofn"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_rdf_graph(
|
|
71
|
+
axioms: Iterable[RDFNodeSerializable], prefix_map: dict[str, str]
|
|
72
|
+
) -> rdflib.Graph:
|
|
73
|
+
"""Serialize axioms as an RDF graph."""
|
|
74
|
+
graph = Graph()
|
|
75
|
+
graph.add((term.URIRef(EXAMPLE_ONTOLOGY_IRI), RDF.type, OWL.Ontology))
|
|
76
|
+
# chain these together so you don't have to worry about
|
|
77
|
+
# default namespaces like owl
|
|
78
|
+
converter = curies.chain(
|
|
79
|
+
[
|
|
80
|
+
Converter.from_rdflib(graph),
|
|
81
|
+
Converter.from_prefix_map(prefix_map),
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
for prefix, uri_prefix in converter.bimap.items():
|
|
85
|
+
graph.namespace_manager.bind(prefix, uri_prefix)
|
|
86
|
+
for axiom in axioms:
|
|
87
|
+
axiom.to_rdflib_node(graph, converter)
|
|
88
|
+
return graph
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def serialize_turtle(
|
|
92
|
+
axioms: Iterable[RDFNodeSerializable],
|
|
93
|
+
*,
|
|
94
|
+
output_prefixes: bool = False,
|
|
95
|
+
prefix_map: dict[str, str],
|
|
96
|
+
) -> str:
|
|
97
|
+
"""Serialize axioms as turtle."""
|
|
98
|
+
graph = get_rdf_graph(axioms, prefix_map=prefix_map)
|
|
99
|
+
rv = graph.serialize()
|
|
100
|
+
if output_prefixes:
|
|
101
|
+
return rv.strip()
|
|
102
|
+
return "\n".join(line for line in rv.splitlines() if not line.startswith("@prefix")).strip()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def list_to_funowl(
|
|
106
|
+
elements: Iterable[FunctionalOWLSerializable | Reference], *, sep: str = " "
|
|
107
|
+
) -> str:
|
|
108
|
+
"""Serialize a list of objects as functional OWL, separated by space or other givne separator."""
|
|
109
|
+
return sep.join(
|
|
110
|
+
element.to_funowl() if isinstance(element, FunctionalOWLSerializable) else element.curie
|
|
111
|
+
for element in elements
|
|
112
|
+
)
|
pyobo/struct/reference.py
CHANGED
|
@@ -1,140 +1,76 @@
|
|
|
1
1
|
"""Data structures for OBO."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime
|
|
6
|
+
import logging
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from collections.abc import Iterable, Sequence
|
|
9
|
+
from typing import Any, NamedTuple
|
|
4
10
|
|
|
5
11
|
import bioregistry
|
|
6
12
|
import curies
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
13
|
+
import dateutil.parser
|
|
14
|
+
import pytz
|
|
15
|
+
from bioregistry import NormalizedNamableReference as Reference
|
|
16
|
+
from curies import ReferenceTuple
|
|
17
|
+
from curies.preprocessing import BlocklistError
|
|
18
|
+
|
|
19
|
+
from ..identifier_utils import (
|
|
20
|
+
NotCURIEError,
|
|
21
|
+
ParseError,
|
|
22
|
+
UnparsableIRIError,
|
|
23
|
+
_is_valid_identifier,
|
|
24
|
+
_parse_str_or_curie_or_uri_helper,
|
|
25
|
+
)
|
|
13
26
|
|
|
14
27
|
__all__ = [
|
|
15
|
-
"Reference",
|
|
16
28
|
"Referenced",
|
|
29
|
+
"default_reference",
|
|
30
|
+
"get_preferred_curie",
|
|
31
|
+
"multi_reference_escape",
|
|
32
|
+
"reference_escape",
|
|
33
|
+
"unspecified_matching",
|
|
17
34
|
]
|
|
18
35
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
resource = bioregistry.get_resource(prefix)
|
|
50
|
-
if resource is None:
|
|
51
|
-
raise ExpansionError(f"Unknown prefix: {prefix}")
|
|
52
|
-
values["prefix"] = resource.prefix
|
|
53
|
-
values["identifier"] = resource.standardize_identifier(identifier)
|
|
54
|
-
if GLOBAL_CHECK_IDS and not resource.is_valid_identifier(values["identifier"]):
|
|
55
|
-
raise ValueError(f"non-standard identifier: {resource.prefix}:{values['identifier']}")
|
|
56
|
-
return values
|
|
57
|
-
|
|
58
|
-
@classmethod
|
|
59
|
-
def auto(cls, prefix: str, identifier: str) -> "Reference":
|
|
60
|
-
"""Create a reference and autopopulate its name."""
|
|
61
|
-
from ..api import get_name
|
|
62
|
-
|
|
63
|
-
name = get_name(prefix, identifier)
|
|
64
|
-
return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name})
|
|
65
|
-
|
|
66
|
-
@property
|
|
67
|
-
def bioregistry_link(self) -> str:
|
|
68
|
-
"""Get the bioregistry link."""
|
|
69
|
-
return f"https://bioregistry.io/{self.curie}"
|
|
70
|
-
|
|
71
|
-
@classmethod
|
|
72
|
-
def from_curie(
|
|
73
|
-
cls,
|
|
74
|
-
curie: str,
|
|
75
|
-
name: Optional[str] = None,
|
|
76
|
-
*,
|
|
77
|
-
strict: bool = True,
|
|
78
|
-
auto: bool = False,
|
|
79
|
-
) -> Optional["Reference"]:
|
|
80
|
-
"""Get a reference from a CURIE.
|
|
81
|
-
|
|
82
|
-
:param curie: The compact URI (CURIE) to parse in the form of `<prefix>:<identifier>`
|
|
83
|
-
:param name: The name associated with the CURIE
|
|
84
|
-
:param strict: If true, raises an error if the CURIE can not be parsed.
|
|
85
|
-
:param auto: Automatically look up name
|
|
86
|
-
"""
|
|
87
|
-
prefix, identifier = normalize_curie(curie, strict=strict)
|
|
88
|
-
return cls._materialize(prefix=prefix, identifier=identifier, name=name, auto=auto)
|
|
89
|
-
|
|
90
|
-
@classmethod
|
|
91
|
-
def from_iri(
|
|
92
|
-
cls,
|
|
93
|
-
iri: str,
|
|
94
|
-
name: Optional[str] = None,
|
|
95
|
-
*,
|
|
96
|
-
auto: bool = False,
|
|
97
|
-
) -> Optional["Reference"]:
|
|
98
|
-
"""Get a reference from an IRI using the Bioregistry.
|
|
99
|
-
|
|
100
|
-
:param iri: The IRI to parse
|
|
101
|
-
:param name: The name associated with the CURIE
|
|
102
|
-
:param auto: Automatically look up name
|
|
103
|
-
"""
|
|
104
|
-
prefix, identifier = bioregistry.parse_iri(iri)
|
|
105
|
-
return cls._materialize(prefix=prefix, identifier=identifier, name=name, auto=auto)
|
|
106
|
-
|
|
107
|
-
@classmethod
|
|
108
|
-
def _materialize(
|
|
109
|
-
cls,
|
|
110
|
-
prefix: Optional[str],
|
|
111
|
-
identifier: Optional[str],
|
|
112
|
-
name: Optional[str] = None,
|
|
113
|
-
*,
|
|
114
|
-
auto: bool = False,
|
|
115
|
-
) -> Optional["Reference"]:
|
|
116
|
-
if prefix is None or identifier is None:
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _parse_str_or_curie_or_uri(
|
|
40
|
+
str_curie_or_uri: str,
|
|
41
|
+
name: str | None = None,
|
|
42
|
+
*,
|
|
43
|
+
strict: bool = False,
|
|
44
|
+
ontology_prefix: str | None = None,
|
|
45
|
+
node: Reference | None = None,
|
|
46
|
+
predicate: Reference | None = None,
|
|
47
|
+
line: str | None = None,
|
|
48
|
+
context: str | None = None,
|
|
49
|
+
upgrade: bool = False,
|
|
50
|
+
) -> Reference | None:
|
|
51
|
+
reference = _parse_str_or_curie_or_uri_helper(
|
|
52
|
+
str_curie_or_uri,
|
|
53
|
+
ontology_prefix=ontology_prefix,
|
|
54
|
+
name=name,
|
|
55
|
+
node=node,
|
|
56
|
+
predicate=predicate,
|
|
57
|
+
line=line,
|
|
58
|
+
context=context,
|
|
59
|
+
upgrade=upgrade,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
match reference:
|
|
63
|
+
case Reference():
|
|
64
|
+
return reference
|
|
65
|
+
case BlocklistError():
|
|
117
66
|
return None
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def __str__(self):
|
|
127
|
-
identifier_lower = self.identifier.lower()
|
|
128
|
-
if identifier_lower.startswith(f"{self.prefix.lower()}:"):
|
|
129
|
-
rv = identifier_lower
|
|
130
|
-
else:
|
|
131
|
-
rv = f"{self.preferred_prefix}:{self._escaped_identifier}"
|
|
132
|
-
if self.name:
|
|
133
|
-
rv = f"{rv} ! {self.name}"
|
|
134
|
-
return rv
|
|
135
|
-
|
|
136
|
-
def __hash__(self):
|
|
137
|
-
return hash((self.__class__, self.prefix, self.identifier))
|
|
67
|
+
case ParseError():
|
|
68
|
+
if strict:
|
|
69
|
+
raise reference
|
|
70
|
+
else:
|
|
71
|
+
return None
|
|
72
|
+
case _:
|
|
73
|
+
raise TypeError(f"Got invalid: ({type(reference)}) {reference}")
|
|
138
74
|
|
|
139
75
|
|
|
140
76
|
class Referenced:
|
|
@@ -142,6 +78,19 @@ class Referenced:
|
|
|
142
78
|
|
|
143
79
|
reference: Reference
|
|
144
80
|
|
|
81
|
+
def __hash__(self) -> int:
|
|
82
|
+
return self.reference.__hash__()
|
|
83
|
+
|
|
84
|
+
def __eq__(self, other: Any) -> bool:
|
|
85
|
+
if isinstance(other, curies.Reference | Referenced):
|
|
86
|
+
return self.prefix == other.prefix and self.identifier == other.identifier
|
|
87
|
+
raise TypeError
|
|
88
|
+
|
|
89
|
+
def __lt__(self, other: Referenced) -> bool:
|
|
90
|
+
if not isinstance(other, curies.Reference | Referenced):
|
|
91
|
+
raise TypeError
|
|
92
|
+
return self.reference < other.reference
|
|
93
|
+
|
|
145
94
|
@property
|
|
146
95
|
def prefix(self):
|
|
147
96
|
"""The prefix of the typedef."""
|
|
@@ -163,16 +112,262 @@ class Referenced:
|
|
|
163
112
|
return self.reference.curie
|
|
164
113
|
|
|
165
114
|
@property
|
|
166
|
-
def
|
|
167
|
-
"""The preferred CURIE for this typedef."""
|
|
168
|
-
return self.reference.preferred_curie
|
|
169
|
-
|
|
170
|
-
@property
|
|
171
|
-
def pair(self) -> tuple[str, str]:
|
|
115
|
+
def pair(self) -> ReferenceTuple:
|
|
172
116
|
"""The pair of namespace/identifier."""
|
|
173
117
|
return self.reference.pair
|
|
174
118
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
119
|
+
|
|
120
|
+
def get_preferred_prefix(
|
|
121
|
+
ref: curies.Reference | Reference | Referenced,
|
|
122
|
+
) -> str:
|
|
123
|
+
"""Get the preferred prefix from a variety of types."""
|
|
124
|
+
match ref:
|
|
125
|
+
case Referenced() | Reference():
|
|
126
|
+
return bioregistry.get_preferred_prefix(ref.prefix) or ref.prefix
|
|
127
|
+
case curies.Reference():
|
|
128
|
+
return ref.prefix
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_preferred_curie(
|
|
132
|
+
ref: curies.Reference | Reference | Referenced,
|
|
133
|
+
) -> str:
|
|
134
|
+
"""Get the preferred CURIE from a variety of types."""
|
|
135
|
+
match ref:
|
|
136
|
+
case Referenced() | Reference():
|
|
137
|
+
return f"{get_preferred_prefix(ref)}:{ref.identifier}"
|
|
138
|
+
case curies.Reference():
|
|
139
|
+
return ref.curie
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def default_reference(prefix: str, identifier: str, name: str | None = None) -> Reference:
|
|
143
|
+
"""Create a CURIE for an "unqualified" reference.
|
|
144
|
+
|
|
145
|
+
:param prefix: The prefix of the ontology in which the "unqualified" reference is made
|
|
146
|
+
:param identifier: The "unqualified" reference. For example, if you just write
|
|
147
|
+
"located_in" somewhere there is supposed to be a CURIE
|
|
148
|
+
:returns: A CURIE for the "unqualified" reference based on the OBO semantic space
|
|
149
|
+
|
|
150
|
+
>>> default_reference("chebi", "conjugate_base_of")
|
|
151
|
+
Reference(prefix="obo", identifier="chebi#conjugate_base_of", name=None)
|
|
152
|
+
"""
|
|
153
|
+
if not identifier.strip():
|
|
154
|
+
raise ValueError("default identifier is empty")
|
|
155
|
+
return Reference(prefix="obo", identifier=f"{prefix}#{identifier}", name=name)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _get_ref_name(reference: curies.Reference | Referenced) -> str | None:
|
|
159
|
+
if isinstance(reference, curies.NamableReference | Referenced):
|
|
160
|
+
return reference.name
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def reference_escape(
|
|
165
|
+
reference: curies.Reference | Referenced,
|
|
166
|
+
*,
|
|
167
|
+
ontology_prefix: str,
|
|
168
|
+
add_name_comment: bool = False,
|
|
169
|
+
) -> str:
|
|
170
|
+
"""Write a reference with default namespace removed."""
|
|
171
|
+
if reference.prefix == "obo" and reference.identifier.startswith(f"{ontology_prefix}#"):
|
|
172
|
+
return reference.identifier.removeprefix(f"{ontology_prefix}#")
|
|
173
|
+
rv = get_preferred_curie(reference)
|
|
174
|
+
if add_name_comment and (name := _get_ref_name(reference)):
|
|
175
|
+
rv += f" ! {name}"
|
|
176
|
+
return rv
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def multi_reference_escape(
|
|
180
|
+
references: Sequence[Reference | Referenced],
|
|
181
|
+
*,
|
|
182
|
+
ontology_prefix: str,
|
|
183
|
+
add_name_comment: bool = False,
|
|
184
|
+
) -> str:
|
|
185
|
+
"""Write multiple references with default namespace normalized."""
|
|
186
|
+
rv = " ".join(
|
|
187
|
+
reference_escape(r, ontology_prefix=ontology_prefix, add_name_comment=False)
|
|
188
|
+
for r in references
|
|
189
|
+
)
|
|
190
|
+
names = [r.name or "" for r in references]
|
|
191
|
+
if add_name_comment and all(names):
|
|
192
|
+
rv += " ! " + " ".join(names)
|
|
193
|
+
return rv
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def comma_separate_references(elements: Iterable[Reference | OBOLiteral]) -> str:
|
|
197
|
+
"""Map a list to strings and make comma separated."""
|
|
198
|
+
return ", ".join(reference_or_literal_to_str(element) for element in elements)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _obo_parse_identifier(
|
|
202
|
+
str_or_curie_or_uri: str,
|
|
203
|
+
*,
|
|
204
|
+
ontology_prefix: str,
|
|
205
|
+
strict: bool = False,
|
|
206
|
+
node: Reference | None = None,
|
|
207
|
+
predicate: Reference | None = None,
|
|
208
|
+
line: str | None = None,
|
|
209
|
+
context: str | None = None,
|
|
210
|
+
name: str | None = None,
|
|
211
|
+
upgrade: bool = True,
|
|
212
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
213
|
+
) -> Reference | None:
|
|
214
|
+
"""Parse from a CURIE, URI, or default string in the ontology prefix's IDspace using OBO semantics."""
|
|
215
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
216
|
+
str_or_curie_or_uri,
|
|
217
|
+
ontology_prefix=ontology_prefix,
|
|
218
|
+
node=node,
|
|
219
|
+
predicate=predicate,
|
|
220
|
+
line=line,
|
|
221
|
+
context=context,
|
|
222
|
+
name=name,
|
|
223
|
+
upgrade=upgrade,
|
|
224
|
+
):
|
|
225
|
+
case Reference() as reference:
|
|
226
|
+
return reference
|
|
227
|
+
case BlocklistError():
|
|
228
|
+
return None
|
|
229
|
+
case NotCURIEError() as exc:
|
|
230
|
+
# this means there's no colon `:`
|
|
231
|
+
if _is_valid_identifier(str_or_curie_or_uri):
|
|
232
|
+
return default_reference(prefix=ontology_prefix, identifier=str_or_curie_or_uri)
|
|
233
|
+
elif strict:
|
|
234
|
+
raise exc
|
|
235
|
+
else:
|
|
236
|
+
return None
|
|
237
|
+
case ParseError() as exc:
|
|
238
|
+
if strict:
|
|
239
|
+
raise exc
|
|
240
|
+
if counter is None:
|
|
241
|
+
logger.warning(str(exc))
|
|
242
|
+
else:
|
|
243
|
+
if not counter[ontology_prefix, str_or_curie_or_uri]:
|
|
244
|
+
logger.warning(str(exc))
|
|
245
|
+
counter[ontology_prefix, str_or_curie_or_uri] += 1
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _parse_reference_or_uri_literal(
|
|
250
|
+
str_or_curie_or_uri: str,
|
|
251
|
+
*,
|
|
252
|
+
ontology_prefix: str,
|
|
253
|
+
strict: bool = False,
|
|
254
|
+
node: Reference,
|
|
255
|
+
predicate: Reference | None = None,
|
|
256
|
+
line: str,
|
|
257
|
+
context: str,
|
|
258
|
+
name: str | None = None,
|
|
259
|
+
upgrade: bool = True,
|
|
260
|
+
#
|
|
261
|
+
counter: Counter[tuple[str, str]] | None = None,
|
|
262
|
+
) -> None | Reference | OBOLiteral:
|
|
263
|
+
match _parse_str_or_curie_or_uri_helper(
|
|
264
|
+
str_or_curie_or_uri,
|
|
265
|
+
node=node,
|
|
266
|
+
predicate=predicate,
|
|
267
|
+
ontology_prefix=ontology_prefix,
|
|
268
|
+
line=line,
|
|
269
|
+
context=context,
|
|
270
|
+
name=name,
|
|
271
|
+
upgrade=upgrade,
|
|
272
|
+
):
|
|
273
|
+
case Reference() as reference:
|
|
274
|
+
return reference
|
|
275
|
+
case BlocklistError():
|
|
276
|
+
return None
|
|
277
|
+
case UnparsableIRIError():
|
|
278
|
+
# this means that it's defininitely a URI,
|
|
279
|
+
# but it couldn't be parsed with Bioregistry
|
|
280
|
+
return OBOLiteral.uri(str_or_curie_or_uri)
|
|
281
|
+
case NotCURIEError() as exc:
|
|
282
|
+
# this means there's no colon `:`
|
|
283
|
+
if _is_valid_identifier(str_or_curie_or_uri):
|
|
284
|
+
return default_reference(prefix=ontology_prefix, identifier=str_or_curie_or_uri)
|
|
285
|
+
elif strict:
|
|
286
|
+
raise exc
|
|
287
|
+
else:
|
|
288
|
+
return None
|
|
289
|
+
case ParseError() as exc:
|
|
290
|
+
if strict:
|
|
291
|
+
raise exc
|
|
292
|
+
if counter is None:
|
|
293
|
+
logger.warning(str(exc))
|
|
294
|
+
else:
|
|
295
|
+
if not counter[ontology_prefix, str_or_curie_or_uri]:
|
|
296
|
+
logger.warning(str(exc))
|
|
297
|
+
counter[ontology_prefix, str_or_curie_or_uri] += 1
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
unspecified_matching = Reference(
|
|
302
|
+
prefix="semapv", identifier="UnspecifiedMatching", name="unspecified matching process"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
class OBOLiteral(NamedTuple):
|
|
307
|
+
"""A tuple representing a property with a literal value."""
|
|
308
|
+
|
|
309
|
+
value: str
|
|
310
|
+
datatype: curies.Reference
|
|
311
|
+
language: str | None
|
|
312
|
+
|
|
313
|
+
@classmethod
|
|
314
|
+
def string(cls, value: str, *, language: str | None = None) -> OBOLiteral:
|
|
315
|
+
"""Get a string literal."""
|
|
316
|
+
return cls(value, curies.Reference(prefix="xsd", identifier="string"), language)
|
|
317
|
+
|
|
318
|
+
@classmethod
|
|
319
|
+
def boolean(cls, value: bool) -> OBOLiteral:
|
|
320
|
+
"""Get a boolean literal."""
|
|
321
|
+
return cls(str(value).lower(), curies.Reference(prefix="xsd", identifier="boolean"), None)
|
|
322
|
+
|
|
323
|
+
@classmethod
|
|
324
|
+
def decimal(cls, value) -> OBOLiteral:
|
|
325
|
+
"""Get a decimal literal."""
|
|
326
|
+
return cls(str(value), curies.Reference(prefix="xsd", identifier="decimal"), None)
|
|
327
|
+
|
|
328
|
+
@classmethod
|
|
329
|
+
def float(cls, value) -> OBOLiteral:
|
|
330
|
+
"""Get a float literal."""
|
|
331
|
+
return cls(str(value), curies.Reference(prefix="xsd", identifier="float"), None)
|
|
332
|
+
|
|
333
|
+
@classmethod
|
|
334
|
+
def integer(cls, value: int | str) -> OBOLiteral:
|
|
335
|
+
"""Get a integer literal."""
|
|
336
|
+
return cls(str(int(value)), curies.Reference(prefix="xsd", identifier="integer"), None)
|
|
337
|
+
|
|
338
|
+
@classmethod
|
|
339
|
+
def year(cls, value: int | str) -> OBOLiteral:
|
|
340
|
+
"""Get a year (gYear) literal."""
|
|
341
|
+
return cls(str(int(value)), curies.Reference(prefix="xsd", identifier="gYear"), None)
|
|
342
|
+
|
|
343
|
+
@classmethod
|
|
344
|
+
def uri(cls, uri: str) -> OBOLiteral:
|
|
345
|
+
"""Get a string literal for a URI."""
|
|
346
|
+
return cls(uri, curies.Reference(prefix="xsd", identifier="anyURI"), None)
|
|
347
|
+
|
|
348
|
+
@classmethod
|
|
349
|
+
def datetime(cls, dt: datetime.datetime | str) -> OBOLiteral:
|
|
350
|
+
"""Get a datetime literal."""
|
|
351
|
+
if isinstance(dt, str):
|
|
352
|
+
dt = _parse_datetime(dt)
|
|
353
|
+
return cls(dt.isoformat(), curies.Reference(prefix="xsd", identifier="dateTime"), None)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _parse_datetime(dd: str) -> datetime.datetime:
|
|
357
|
+
xx = dateutil.parser.parse(dd)
|
|
358
|
+
xx = xx.astimezone(pytz.UTC)
|
|
359
|
+
return xx
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _reference_list_tag(
|
|
363
|
+
tag: str, references: Iterable[Reference], ontology_prefix: str
|
|
364
|
+
) -> Iterable[str]:
|
|
365
|
+
for reference in references:
|
|
366
|
+
yield f"{tag}: {reference_escape(reference, ontology_prefix=ontology_prefix, add_name_comment=True)}"
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def reference_or_literal_to_str(x: OBOLiteral | curies.Reference | Reference | Referenced) -> str:
|
|
370
|
+
"""Get a string from a reference or literal."""
|
|
371
|
+
if isinstance(x, OBOLiteral):
|
|
372
|
+
return x.value
|
|
373
|
+
return get_preferred_curie(x)
|