obographs 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- obographs/model.py +19 -6
- obographs/standardized.py +210 -0
- obographs/version.py +1 -1
- {obographs-0.0.1.dist-info → obographs-0.0.2.dist-info}/METADATA +20 -4
- obographs-0.0.2.dist-info/RECORD +10 -0
- obographs-0.0.1.dist-info/RECORD +0 -9
- {obographs-0.0.1.dist-info → obographs-0.0.2.dist-info}/WHEEL +0 -0
- {obographs-0.0.1.dist-info → obographs-0.0.2.dist-info}/entry_points.txt +0 -0
- {obographs-0.0.1.dist-info → obographs-0.0.2.dist-info}/licenses/LICENSE +0 -0
obographs/model.py
CHANGED
|
@@ -13,10 +13,15 @@ import json
|
|
|
13
13
|
import logging
|
|
14
14
|
from collections import defaultdict
|
|
15
15
|
from pathlib import Path
|
|
16
|
-
from typing import Any, Literal, TypeAlias, overload
|
|
16
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeAlias, overload
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import curies
|
|
22
|
+
|
|
23
|
+
from .standardized import StandardizedGraph
|
|
24
|
+
|
|
20
25
|
__all__ = [
|
|
21
26
|
"Definition",
|
|
22
27
|
"Edge",
|
|
@@ -58,10 +63,10 @@ OBO_SYNONYM_TO_OIO: dict[str, SynonymPredicate] = {
|
|
|
58
63
|
class Property(BaseModel):
|
|
59
64
|
"""Represent a property inside a metadata element."""
|
|
60
65
|
|
|
61
|
-
pred: str
|
|
62
|
-
val: str
|
|
63
|
-
|
|
64
|
-
|
|
66
|
+
pred: str
|
|
67
|
+
val: str
|
|
68
|
+
xrefs: list[str] | None = None
|
|
69
|
+
meta: Meta | None = None
|
|
65
70
|
|
|
66
71
|
|
|
67
72
|
class Definition(BaseModel):
|
|
@@ -74,7 +79,7 @@ class Definition(BaseModel):
|
|
|
74
79
|
class Xref(BaseModel):
|
|
75
80
|
"""Represents a cross-reference."""
|
|
76
81
|
|
|
77
|
-
val: str
|
|
82
|
+
val: str
|
|
78
83
|
|
|
79
84
|
|
|
80
85
|
class Synonym(BaseModel):
|
|
@@ -87,6 +92,7 @@ class Synonym(BaseModel):
|
|
|
87
92
|
default_factory=list,
|
|
88
93
|
description="A list of CURIEs/IRIs for provenance for the synonym",
|
|
89
94
|
)
|
|
95
|
+
meta: Meta | None = None
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
class Meta(BaseModel):
|
|
@@ -132,11 +138,18 @@ class Graph(BaseModel):
|
|
|
132
138
|
domainRangeAxioms: list[Any] = Field(default_factory=list) # noqa:N815
|
|
133
139
|
propertyChainAxioms: list[Any] = Field(default_factory=list) # noqa:N815
|
|
134
140
|
|
|
141
|
+
def standardize(self, converter: curies.Converter) -> StandardizedGraph:
|
|
142
|
+
"""Standardize the graph."""
|
|
143
|
+
from .standardized import StandardizedGraph
|
|
144
|
+
|
|
145
|
+
return StandardizedGraph.from_obograph_raw(self, converter)
|
|
146
|
+
|
|
135
147
|
|
|
136
148
|
class GraphDocument(BaseModel):
|
|
137
149
|
"""Represents a list of OBO graphs."""
|
|
138
150
|
|
|
139
151
|
graphs: list[Graph]
|
|
152
|
+
meta: Meta | None = None
|
|
140
153
|
|
|
141
154
|
|
|
142
155
|
def get_id_to_node(graph: Graph) -> dict[str, Node]:
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Standardize an OBO graph."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from curies import Converter, Reference
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
from typing_extensions import Self
|
|
8
|
+
|
|
9
|
+
from obographs.model import Definition, Edge, Graph, Meta, Node, NodeType, Property, Synonym, Xref
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"StandardizedDefinition",
|
|
13
|
+
"StandardizedEdge",
|
|
14
|
+
"StandardizedGraph",
|
|
15
|
+
"StandardizedMeta",
|
|
16
|
+
"StandardizedNode",
|
|
17
|
+
"StandardizedXref",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class StandardizedProperty(BaseModel):
|
|
22
|
+
"""A standardized property."""
|
|
23
|
+
|
|
24
|
+
predicate: Reference
|
|
25
|
+
value: Reference
|
|
26
|
+
xrefs: list[Reference] | None = None
|
|
27
|
+
meta: StandardizedMeta | None = None
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def from_obograph_raw(cls, prop: Property, converter: Converter) -> Self:
|
|
31
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
32
|
+
return cls(
|
|
33
|
+
predicate=_curie_or_uri_to_ref(prop.pred, converter),
|
|
34
|
+
value=_curie_or_uri_to_ref(prop.val, converter),
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class StandardizedDefinition(BaseModel):
|
|
39
|
+
"""A standardized definition."""
|
|
40
|
+
|
|
41
|
+
value: str | None = Field(default=None)
|
|
42
|
+
xrefs: list[Reference] | None = Field(default=None)
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_obograph_raw(cls, definition: Definition | None, converter: Converter) -> Self | None:
|
|
46
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
47
|
+
if definition is None:
|
|
48
|
+
return None
|
|
49
|
+
return cls(
|
|
50
|
+
value=definition.val,
|
|
51
|
+
xrefs=_parse_list(definition.xrefs, converter),
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class StandardizedXref(BaseModel):
|
|
56
|
+
"""A standardized database cross-reference."""
|
|
57
|
+
|
|
58
|
+
reference: Reference
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_obograph_raw(cls, xref: Xref, converter: Converter) -> Self:
|
|
62
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
63
|
+
return cls(reference=_curie_or_uri_to_ref(xref.val, converter))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class StandardizedSynonym(BaseModel):
|
|
67
|
+
"""A standardized synonym."""
|
|
68
|
+
|
|
69
|
+
text: str
|
|
70
|
+
predicate: Reference
|
|
71
|
+
type: Reference | None = None
|
|
72
|
+
xrefs: list[Reference] | None = None
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_obograph_raw(cls, synonym: Synonym, converter: Converter) -> Self:
|
|
76
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
77
|
+
return cls(
|
|
78
|
+
text=synonym.val,
|
|
79
|
+
predicate=Reference(prefix="oboInOwl", identifier=synonym.pred),
|
|
80
|
+
type=synonym.synonymType and _curie_or_uri_to_ref(synonym.synonymType, converter),
|
|
81
|
+
xrefs=_parse_list(synonym.xrefs, converter),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class StandardizedMeta(BaseModel):
|
|
86
|
+
"""A standardized meta object."""
|
|
87
|
+
|
|
88
|
+
definition: StandardizedDefinition | None
|
|
89
|
+
subsets: list[Reference] | None = None
|
|
90
|
+
xrefs: list[StandardizedXref] | None = None
|
|
91
|
+
synonyms: list[StandardizedSynonym] | None = None
|
|
92
|
+
comments: list[str] | None = None
|
|
93
|
+
deprecated: bool = False
|
|
94
|
+
version: str | None = None
|
|
95
|
+
properties: list[StandardizedProperty] | None = None
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def from_obograph_raw(cls, meta: Meta | None, converter: Converter) -> Self | None:
|
|
99
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
100
|
+
if meta is None:
|
|
101
|
+
return None
|
|
102
|
+
return cls(
|
|
103
|
+
definition=StandardizedDefinition.from_obograph_raw(meta.definition, converter),
|
|
104
|
+
subsets=[_curie_or_uri_to_ref(subset, converter) for subset in meta.subsets]
|
|
105
|
+
if meta.subsets
|
|
106
|
+
else None,
|
|
107
|
+
xrefs=[StandardizedXref.from_obograph_raw(xref, converter) for xref in meta.xrefs]
|
|
108
|
+
if meta.xrefs
|
|
109
|
+
else None,
|
|
110
|
+
synonyms=[
|
|
111
|
+
StandardizedSynonym.from_obograph_raw(synonym, converter)
|
|
112
|
+
for synonym in meta.synonyms
|
|
113
|
+
]
|
|
114
|
+
if meta.synonyms
|
|
115
|
+
else None,
|
|
116
|
+
comments=meta.comments,
|
|
117
|
+
version=meta.version,
|
|
118
|
+
deprecated=meta.deprecated,
|
|
119
|
+
properties=[
|
|
120
|
+
StandardizedProperty.from_obograph_raw(p, converter)
|
|
121
|
+
for p in meta.basicPropertyValues
|
|
122
|
+
]
|
|
123
|
+
if meta.basicPropertyValues
|
|
124
|
+
else None,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class StandardizedNode(BaseModel):
|
|
129
|
+
"""A standardized node."""
|
|
130
|
+
|
|
131
|
+
reference: Reference
|
|
132
|
+
label: str | None = Field(None)
|
|
133
|
+
meta: StandardizedMeta | None = None
|
|
134
|
+
type: NodeType = Field(..., description="Type of node")
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_obograph_raw(cls, node: Node, converter: Converter) -> Self:
|
|
138
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
139
|
+
return cls(
|
|
140
|
+
reference=_curie_or_uri_to_ref(node.id, converter),
|
|
141
|
+
label=node.lbl,
|
|
142
|
+
meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
|
|
143
|
+
type=node.type,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class StandardizedEdge(BaseModel):
|
|
148
|
+
"""A standardized edge."""
|
|
149
|
+
|
|
150
|
+
subject: Reference
|
|
151
|
+
predicate: Reference
|
|
152
|
+
object: Reference
|
|
153
|
+
meta: StandardizedMeta | None = None
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def from_obograph_raw(cls, node: Edge, converter: Converter) -> Self:
|
|
157
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
158
|
+
return cls(
|
|
159
|
+
subject=_curie_or_uri_to_ref(node.sub, converter),
|
|
160
|
+
predicate=_curie_or_uri_to_ref(node.pred, converter),
|
|
161
|
+
object=_curie_or_uri_to_ref(node.obj, converter),
|
|
162
|
+
meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class StandardizedGraph(BaseModel):
|
|
167
|
+
"""A standardized graph."""
|
|
168
|
+
|
|
169
|
+
id: str | None = None
|
|
170
|
+
meta: StandardizedMeta | None = None
|
|
171
|
+
nodes: list[StandardizedNode] = Field(default_factory=list)
|
|
172
|
+
edges: list[StandardizedEdge] = Field(default_factory=list)
|
|
173
|
+
|
|
174
|
+
# TODO other bits
|
|
175
|
+
|
|
176
|
+
@classmethod
|
|
177
|
+
def from_obograph_raw(cls, graph: Graph, converter: Converter) -> Self:
|
|
178
|
+
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
179
|
+
return cls(
|
|
180
|
+
id=graph.id,
|
|
181
|
+
meta=StandardizedMeta.from_obograph_raw(graph.meta, converter),
|
|
182
|
+
nodes=[StandardizedNode.from_obograph_raw(node, converter) for node in graph.nodes],
|
|
183
|
+
edges=[StandardizedEdge.from_obograph_raw(edge, converter) for edge in graph.edges],
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _parse_list(ss: list[str] | None, converter: Converter) -> list[Reference] | None:
|
|
188
|
+
if not ss:
|
|
189
|
+
return None
|
|
190
|
+
return [_curie_or_uri_to_ref(x, converter) for x in ss]
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
#: defined in https://github.com/geneontology/obographs/blob/6676b10a5cce04707d75b9dd46fa08de70322b0b/obographs-owlapi/src/main/java/org/geneontology/obographs/owlapi/FromOwl.java#L36-L39
|
|
194
|
+
BUILTINS = {
|
|
195
|
+
"is_a": Reference(prefix="rdfs", identifier="subClassOf"),
|
|
196
|
+
"subPropertyOf": Reference(prefix="rdfs", identifier="subPropertyOf"),
|
|
197
|
+
"type": Reference(prefix="rdf", identifier="type"),
|
|
198
|
+
"inverseOf": Reference(prefix="owl", identifier="inverseOf"),
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference:
|
|
203
|
+
if s in BUILTINS:
|
|
204
|
+
return BUILTINS[s]
|
|
205
|
+
if converter.is_uri(s):
|
|
206
|
+
p, o = converter.parse_uri(s)
|
|
207
|
+
return Reference(prefix=p, identifier=o)
|
|
208
|
+
elif converter.is_curie(s):
|
|
209
|
+
pass
|
|
210
|
+
raise ValueError(f"can't parse string: {s}")
|
obographs/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: obographs
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: A python data model for OBO Graphs
|
|
5
5
|
Keywords: snekpack,cookiecutter
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -23,6 +23,8 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
23
23
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
24
24
|
Classifier: Typing :: Typed
|
|
25
25
|
Requires-Dist: pydantic
|
|
26
|
+
Requires-Dist: curies
|
|
27
|
+
Requires-Dist: typing-extensions
|
|
26
28
|
Requires-Dist: sphinx>=8 ; extra == 'docs'
|
|
27
29
|
Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
|
|
28
30
|
Requires-Dist: sphinx-automodapi ; extra == 'docs'
|
|
@@ -89,11 +91,25 @@ data model.
|
|
|
89
91
|
import obographs
|
|
90
92
|
|
|
91
93
|
url = "https://raw.githubusercontent.com/geneontology/obographs/refs/heads/master/examples/abox.json"
|
|
92
|
-
|
|
94
|
+
graph_raw = obographs.read(url)
|
|
93
95
|
```
|
|
94
96
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
+
The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
|
|
98
|
+
of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
|
|
99
|
+
`obographs` implements a standardization workflow that creates new data
|
|
100
|
+
structures with parsed/normalized URIs and symbols that has Pythonic nams. Use
|
|
101
|
+
it like:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import curies
|
|
105
|
+
|
|
106
|
+
converter = curies.get_bioregistry_converter()
|
|
107
|
+
|
|
108
|
+
graph = graph_raw.standardize(converter)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Now, this graph object will have nice Pythonic names and references parsed as
|
|
112
|
+
`curies.Reference` objects.
|
|
97
113
|
|
|
98
114
|
## 🚀 Installation
|
|
99
115
|
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
obographs/version.py,sha256=04839f3ad0ad481ebb750f5228278ee8a55b26c17bd61c0a3f2ccc6da50204ae,961
|
|
2
|
+
obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
|
|
3
|
+
obographs/model.py,sha256=3387390f1a30fe47b22c7c346dfdf2b6a595ccbc7cd8d7d09debf4a13f8f6e6c,6846
|
|
4
|
+
obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
|
|
5
|
+
obographs/standardized.py,sha256=15a58dcb9af39a01565fdb096389caf2702f94052e933f9368d93caf620022f5,7099
|
|
6
|
+
obographs-0.0.2.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
|
|
7
|
+
obographs-0.0.2.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
|
|
8
|
+
obographs-0.0.2.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
|
|
9
|
+
obographs-0.0.2.dist-info/METADATA,sha256=3e577f9dcce8afd6b2b0694935c320b898e5253ba6f0dcf8520b4d1a996ef3a3,13429
|
|
10
|
+
obographs-0.0.2.dist-info/RECORD,,
|
obographs-0.0.1.dist-info/RECORD
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
obographs/version.py,sha256=0ce3c0b54472caa26dc5eacd65d21c9f7a242a94f5f9ea4d972cf08c2775018b,961
|
|
2
|
-
obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
|
|
3
|
-
obographs/model.py,sha256=2032a2fb41761be360fbae3280ae5db570c12eb528bd3215c9d2e39494934ada,6441
|
|
4
|
-
obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
|
|
5
|
-
obographs-0.0.1.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
|
|
6
|
-
obographs-0.0.1.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
|
|
7
|
-
obographs-0.0.1.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
|
|
8
|
-
obographs-0.0.1.dist-info/METADATA,sha256=7a7a77f96ddea3e845896d0a37e44ef7a559187f7c01738c77d2fb1a881ffcb9,12972
|
|
9
|
-
obographs-0.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|