obographs 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- obographs/__init__.py +20 -1
- obographs/model.py +8 -3
- obographs/standardized.py +128 -44
- obographs/version.py +1 -1
- {obographs-0.0.2.dist-info → obographs-0.0.3.dist-info}/METADATA +3 -3
- obographs-0.0.3.dist-info/RECORD +10 -0
- obographs-0.0.2.dist-info/RECORD +0 -10
- {obographs-0.0.2.dist-info → obographs-0.0.3.dist-info}/WHEEL +0 -0
- {obographs-0.0.2.dist-info → obographs-0.0.3.dist-info}/entry_points.txt +0 -0
- {obographs-0.0.2.dist-info → obographs-0.0.3.dist-info}/licenses/LICENSE +0 -0
obographs/__init__.py
CHANGED
|
@@ -1,13 +1,32 @@
|
|
|
1
1
|
"""A python data model for OBO Graphs."""
|
|
2
2
|
|
|
3
|
-
from .model import Graph, GraphDocument, Meta, Node, Property, Synonym, Xref, read
|
|
3
|
+
from .model import Graph, GraphDocument, Meta, Node, NodeType, Property, Synonym, Xref, read
|
|
4
|
+
from .standardized import (
|
|
5
|
+
StandardizedDefinition,
|
|
6
|
+
StandardizedEdge,
|
|
7
|
+
StandardizedGraph,
|
|
8
|
+
StandardizedMeta,
|
|
9
|
+
StandardizedNode,
|
|
10
|
+
StandardizedProperty,
|
|
11
|
+
StandardizedSynonym,
|
|
12
|
+
StandardizedXref,
|
|
13
|
+
)
|
|
4
14
|
|
|
5
15
|
__all__ = [
|
|
6
16
|
"Graph",
|
|
7
17
|
"GraphDocument",
|
|
8
18
|
"Meta",
|
|
9
19
|
"Node",
|
|
20
|
+
"NodeType",
|
|
10
21
|
"Property",
|
|
22
|
+
"StandardizedDefinition",
|
|
23
|
+
"StandardizedEdge",
|
|
24
|
+
"StandardizedGraph",
|
|
25
|
+
"StandardizedMeta",
|
|
26
|
+
"StandardizedNode",
|
|
27
|
+
"StandardizedProperty",
|
|
28
|
+
"StandardizedSynonym",
|
|
29
|
+
"StandardizedXref",
|
|
11
30
|
"Synonym",
|
|
12
31
|
"Xref",
|
|
13
32
|
"read",
|
obographs/model.py
CHANGED
|
@@ -64,7 +64,12 @@ class Property(BaseModel):
|
|
|
64
64
|
"""Represent a property inside a metadata element."""
|
|
65
65
|
|
|
66
66
|
pred: str
|
|
67
|
-
val: str
|
|
67
|
+
val: str | None = Field(
|
|
68
|
+
None,
|
|
69
|
+
description="Stores the value of the property. This can be a string representing a "
|
|
70
|
+
"literal or IRI. This isn't supposed to be nullable, but it happens a lot - might be a "
|
|
71
|
+
"bug in OWLAPI or ROBOT",
|
|
72
|
+
)
|
|
68
73
|
xrefs: list[str] | None = None
|
|
69
74
|
meta: Meta | None = None
|
|
70
75
|
|
|
@@ -87,7 +92,7 @@ class Synonym(BaseModel):
|
|
|
87
92
|
|
|
88
93
|
val: str | None = Field(default=None)
|
|
89
94
|
pred: str = Field(default="hasExactSynonym")
|
|
90
|
-
synonymType: str | None = Field(examples=["OMO:0003000"]) # noqa:N815
|
|
95
|
+
synonymType: str | None = Field(None, examples=["OMO:0003000"]) # noqa:N815
|
|
91
96
|
xrefs: list[str] = Field(
|
|
92
97
|
default_factory=list,
|
|
93
98
|
description="A list of CURIEs/IRIs for provenance for the synonym",
|
|
@@ -123,7 +128,7 @@ class Node(BaseModel):
|
|
|
123
128
|
id: str = Field(..., description="The IRI for the node")
|
|
124
129
|
lbl: str | None = Field(None, description="The name of the node")
|
|
125
130
|
meta: Meta | None = None
|
|
126
|
-
type: NodeType = Field(
|
|
131
|
+
type: NodeType | None = Field(None, description="Type of node")
|
|
127
132
|
|
|
128
133
|
|
|
129
134
|
class Graph(BaseModel):
|
obographs/standardized.py
CHANGED
|
@@ -2,7 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from curies import Converter, Reference, vocabulary
|
|
6
8
|
from pydantic import BaseModel, Field
|
|
7
9
|
from typing_extensions import Self
|
|
8
10
|
|
|
@@ -14,24 +16,39 @@ __all__ = [
|
|
|
14
16
|
"StandardizedGraph",
|
|
15
17
|
"StandardizedMeta",
|
|
16
18
|
"StandardizedNode",
|
|
19
|
+
"StandardizedProperty",
|
|
20
|
+
"StandardizedSynonym",
|
|
17
21
|
"StandardizedXref",
|
|
18
22
|
]
|
|
19
23
|
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
20
26
|
|
|
21
27
|
class StandardizedProperty(BaseModel):
|
|
22
28
|
"""A standardized property."""
|
|
23
29
|
|
|
24
30
|
predicate: Reference
|
|
25
|
-
value: Reference
|
|
31
|
+
value: Reference | str = Field(
|
|
32
|
+
..., description="Parsed into a Reference if a CURIE or IRI, or a string if it's a literal"
|
|
33
|
+
)
|
|
26
34
|
xrefs: list[Reference] | None = None
|
|
27
35
|
meta: StandardizedMeta | None = None
|
|
28
36
|
|
|
29
37
|
@classmethod
|
|
30
38
|
def from_obograph_raw(cls, prop: Property, converter: Converter) -> Self:
|
|
31
39
|
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
40
|
+
if not prop.val or not prop.pred:
|
|
41
|
+
raise ValueError
|
|
42
|
+
value: Reference | str | None
|
|
43
|
+
if not prop.val.startswith("http://") and not prop.val.startswith("https"):
|
|
44
|
+
value = _curie_or_uri_to_ref(prop.val, converter)
|
|
45
|
+
else:
|
|
46
|
+
value = prop.val
|
|
47
|
+
if value is None:
|
|
48
|
+
raise ValueError
|
|
32
49
|
return cls(
|
|
33
50
|
predicate=_curie_or_uri_to_ref(prop.pred, converter),
|
|
34
|
-
value=
|
|
51
|
+
value=value,
|
|
35
52
|
)
|
|
36
53
|
|
|
37
54
|
|
|
@@ -95,33 +112,54 @@ class StandardizedMeta(BaseModel):
|
|
|
95
112
|
properties: list[StandardizedProperty] | None = None
|
|
96
113
|
|
|
97
114
|
@classmethod
|
|
98
|
-
def from_obograph_raw(
|
|
115
|
+
def from_obograph_raw( # noqa:C901
|
|
116
|
+
cls, meta: Meta | None, converter: Converter, flag: str = ""
|
|
117
|
+
) -> Self | None:
|
|
99
118
|
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
100
119
|
if meta is None:
|
|
101
120
|
return None
|
|
121
|
+
|
|
122
|
+
xrefs = []
|
|
123
|
+
for raw_xref in meta.xrefs or []:
|
|
124
|
+
if raw_xref.val:
|
|
125
|
+
try:
|
|
126
|
+
st_xref = StandardizedXref.from_obograph_raw(raw_xref, converter)
|
|
127
|
+
except ValueError:
|
|
128
|
+
logger.debug("[%s] failed to standardize xref: %s", flag, raw_xref)
|
|
129
|
+
else:
|
|
130
|
+
xrefs.append(st_xref)
|
|
131
|
+
|
|
132
|
+
synonyms = []
|
|
133
|
+
for raw_synonym in meta.synonyms or []:
|
|
134
|
+
if raw_synonym.val:
|
|
135
|
+
try:
|
|
136
|
+
s = StandardizedSynonym.from_obograph_raw(raw_synonym, converter)
|
|
137
|
+
except ValueError:
|
|
138
|
+
logger.debug("[%s] failed to standardize synonym: %s", flag, raw_synonym)
|
|
139
|
+
else:
|
|
140
|
+
synonyms.append(s)
|
|
141
|
+
|
|
142
|
+
props = []
|
|
143
|
+
for raw_prop in meta.basicPropertyValues or []:
|
|
144
|
+
if raw_prop.val and raw_prop.pred:
|
|
145
|
+
try:
|
|
146
|
+
prop = StandardizedProperty.from_obograph_raw(raw_prop, converter)
|
|
147
|
+
except ValueError:
|
|
148
|
+
logger.debug("[%s] failed to standardize property: %s", flag, raw_prop)
|
|
149
|
+
else:
|
|
150
|
+
props.append(prop)
|
|
151
|
+
|
|
102
152
|
return cls(
|
|
103
153
|
definition=StandardizedDefinition.from_obograph_raw(meta.definition, converter),
|
|
104
154
|
subsets=[_curie_or_uri_to_ref(subset, converter) for subset in meta.subsets]
|
|
105
155
|
if meta.subsets
|
|
106
156
|
else None,
|
|
107
|
-
xrefs=
|
|
108
|
-
|
|
109
|
-
else None,
|
|
110
|
-
synonyms=[
|
|
111
|
-
StandardizedSynonym.from_obograph_raw(synonym, converter)
|
|
112
|
-
for synonym in meta.synonyms
|
|
113
|
-
]
|
|
114
|
-
if meta.synonyms
|
|
115
|
-
else None,
|
|
157
|
+
xrefs=xrefs or None,
|
|
158
|
+
synonyms=synonyms or None,
|
|
116
159
|
comments=meta.comments,
|
|
117
160
|
version=meta.version,
|
|
118
161
|
deprecated=meta.deprecated,
|
|
119
|
-
properties=
|
|
120
|
-
StandardizedProperty.from_obograph_raw(p, converter)
|
|
121
|
-
for p in meta.basicPropertyValues
|
|
122
|
-
]
|
|
123
|
-
if meta.basicPropertyValues
|
|
124
|
-
else None,
|
|
162
|
+
properties=props or None,
|
|
125
163
|
)
|
|
126
164
|
|
|
127
165
|
|
|
@@ -131,15 +169,19 @@ class StandardizedNode(BaseModel):
|
|
|
131
169
|
reference: Reference
|
|
132
170
|
label: str | None = Field(None)
|
|
133
171
|
meta: StandardizedMeta | None = None
|
|
134
|
-
type: NodeType = Field(
|
|
172
|
+
type: NodeType | None = Field(None, description="Type of node")
|
|
135
173
|
|
|
136
174
|
@classmethod
|
|
137
|
-
def from_obograph_raw(cls, node: Node, converter: Converter) -> Self:
|
|
175
|
+
def from_obograph_raw(cls, node: Node, converter: Converter) -> Self | None:
|
|
138
176
|
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
177
|
+
reference = _curie_or_uri_to_ref(node.id, converter)
|
|
178
|
+
if reference is None:
|
|
179
|
+
logger.warning("failed to parse node's ID %s", node.id)
|
|
180
|
+
return None
|
|
139
181
|
return cls(
|
|
140
|
-
reference=
|
|
182
|
+
reference=reference,
|
|
141
183
|
label=node.lbl,
|
|
142
|
-
meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
|
|
184
|
+
meta=StandardizedMeta.from_obograph_raw(node.meta, converter, flag=reference.curie),
|
|
143
185
|
type=node.type,
|
|
144
186
|
)
|
|
145
187
|
|
|
@@ -153,13 +195,27 @@ class StandardizedEdge(BaseModel):
|
|
|
153
195
|
meta: StandardizedMeta | None = None
|
|
154
196
|
|
|
155
197
|
@classmethod
|
|
156
|
-
def from_obograph_raw(cls,
|
|
198
|
+
def from_obograph_raw(cls, edge: Edge, converter: Converter) -> Self | None:
|
|
157
199
|
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
200
|
+
subject = _curie_or_uri_to_ref(edge.sub, converter)
|
|
201
|
+
if not subject:
|
|
202
|
+
logger.warning("failed to parse edge's subject %s", edge.sub)
|
|
203
|
+
return None
|
|
204
|
+
predicate = _curie_or_uri_to_ref(edge.pred, converter)
|
|
205
|
+
if not predicate:
|
|
206
|
+
logger.warning("failed to parse edge's predicate %s", edge.pred)
|
|
207
|
+
return None
|
|
208
|
+
obj = _curie_or_uri_to_ref(edge.obj, converter)
|
|
209
|
+
if not obj:
|
|
210
|
+
logger.warning("failed to parse edge's object %s", edge.obj)
|
|
211
|
+
return None
|
|
158
212
|
return cls(
|
|
159
|
-
subject=
|
|
160
|
-
predicate=
|
|
161
|
-
object=
|
|
162
|
-
meta=StandardizedMeta.from_obograph_raw(
|
|
213
|
+
subject=subject,
|
|
214
|
+
predicate=predicate,
|
|
215
|
+
object=obj,
|
|
216
|
+
meta=StandardizedMeta.from_obograph_raw(
|
|
217
|
+
edge.meta, converter, flag=f"{subject.curie} {predicate.curie} {obj.curie}"
|
|
218
|
+
),
|
|
163
219
|
)
|
|
164
220
|
|
|
165
221
|
|
|
@@ -178,33 +234,61 @@ class StandardizedGraph(BaseModel):
|
|
|
178
234
|
"""Instantiate by standardizing a raw OBO Graph object."""
|
|
179
235
|
return cls(
|
|
180
236
|
id=graph.id,
|
|
181
|
-
meta=StandardizedMeta.from_obograph_raw(graph.meta, converter),
|
|
182
|
-
nodes=[
|
|
183
|
-
|
|
237
|
+
meta=StandardizedMeta.from_obograph_raw(graph.meta, converter, flag=graph.id or ""),
|
|
238
|
+
nodes=[
|
|
239
|
+
s_node
|
|
240
|
+
for node in graph.nodes
|
|
241
|
+
if (s_node := StandardizedNode.from_obograph_raw(node, converter))
|
|
242
|
+
],
|
|
243
|
+
edges=[
|
|
244
|
+
s_edge
|
|
245
|
+
for edge in graph.edges
|
|
246
|
+
if (s_edge := StandardizedEdge.from_obograph_raw(edge, converter))
|
|
247
|
+
],
|
|
184
248
|
)
|
|
185
249
|
|
|
250
|
+
def _get_property(self, predicate: Reference) -> str | Reference | None:
|
|
251
|
+
if self.meta is None:
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
for p in self.meta.properties or []:
|
|
255
|
+
if p.predicate == predicate:
|
|
256
|
+
return p.value
|
|
257
|
+
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def name(self) -> str | None:
|
|
262
|
+
"""Look up the name of the graph."""
|
|
263
|
+
r = self._get_property(Reference(prefix="dcterms", identifier="title"))
|
|
264
|
+
if isinstance(r, Reference):
|
|
265
|
+
raise TypeError
|
|
266
|
+
return r
|
|
267
|
+
|
|
186
268
|
|
|
187
|
-
def _parse_list(
|
|
188
|
-
if not
|
|
269
|
+
def _parse_list(curie_or_uris: list[str] | None, converter: Converter) -> list[Reference] | None:
|
|
270
|
+
if not curie_or_uris:
|
|
189
271
|
return None
|
|
190
|
-
return [
|
|
272
|
+
return [
|
|
273
|
+
reference
|
|
274
|
+
for curie_or_uri in curie_or_uris
|
|
275
|
+
if (reference := _curie_or_uri_to_ref(curie_or_uri, converter))
|
|
276
|
+
]
|
|
191
277
|
|
|
192
278
|
|
|
193
279
|
#: defined in https://github.com/geneontology/obographs/blob/6676b10a5cce04707d75b9dd46fa08de70322b0b/obographs-owlapi/src/main/java/org/geneontology/obographs/owlapi/FromOwl.java#L36-L39
|
|
194
280
|
BUILTINS = {
|
|
195
|
-
"is_a":
|
|
196
|
-
"subPropertyOf":
|
|
197
|
-
"type":
|
|
281
|
+
"is_a": vocabulary.is_a,
|
|
282
|
+
"subPropertyOf": vocabulary.subproperty_of,
|
|
283
|
+
"type": vocabulary.rdf_type,
|
|
198
284
|
"inverseOf": Reference(prefix="owl", identifier="inverseOf"),
|
|
199
285
|
}
|
|
200
286
|
|
|
201
287
|
|
|
202
|
-
def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference:
|
|
288
|
+
def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference | None:
|
|
203
289
|
if s in BUILTINS:
|
|
204
290
|
return BUILTINS[s]
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
return
|
|
208
|
-
|
|
209
|
-
pass
|
|
210
|
-
raise ValueError(f"can't parse string: {s}")
|
|
291
|
+
reference_tuple = converter.parse(s, strict=False)
|
|
292
|
+
if reference_tuple is not None:
|
|
293
|
+
return reference_tuple.to_pydantic()
|
|
294
|
+
return None
|
obographs/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: obographs
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: A python data model for OBO Graphs
|
|
5
5
|
Keywords: snekpack,cookiecutter
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
23
23
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
24
24
|
Classifier: Typing :: Typed
|
|
25
25
|
Requires-Dist: pydantic
|
|
26
|
-
Requires-Dist: curies
|
|
26
|
+
Requires-Dist: curies>=0.10.7
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: sphinx>=8 ; extra == 'docs'
|
|
29
29
|
Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
|
|
@@ -97,7 +97,7 @@ graph_raw = obographs.read(url)
|
|
|
97
97
|
The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
|
|
98
98
|
of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
|
|
99
99
|
`obographs` implements a standardization workflow that creates new data
|
|
100
|
-
structures with parsed/normalized URIs and symbols that has Pythonic
|
|
100
|
+
structures with parsed/normalized URIs and symbols that has Pythonic names. Use
|
|
101
101
|
it like:
|
|
102
102
|
|
|
103
103
|
```python
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
obographs/version.py,sha256=3709d2674acd39467891048cff0ca525f08edfba747910e241f23b6fb168d2dc,961
|
|
2
|
+
obographs/__init__.py,sha256=0c3d73d035bde44a5375cbc4dffcf314fc89994853777e0048f1307b0fdd53a8,706
|
|
3
|
+
obographs/model.py,sha256=445dbea604eb3d732c691afee71291348a6185750121ac41fcfeb8973b6a219b,7120
|
|
4
|
+
obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
|
|
5
|
+
obographs/standardized.py,sha256=27955c754e0f077aa8a4c36e9941331e5b8a716bb2cc21295b7ede7917bbc18d,9859
|
|
6
|
+
obographs-0.0.3.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
|
|
7
|
+
obographs-0.0.3.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
|
|
8
|
+
obographs-0.0.3.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
|
|
9
|
+
obographs-0.0.3.dist-info/METADATA,sha256=ce84eea275fad376b42901cbd5d93982cae494d0595cea5cbd7ee44748f6631c,13438
|
|
10
|
+
obographs-0.0.3.dist-info/RECORD,,
|
obographs-0.0.2.dist-info/RECORD
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
obographs/version.py,sha256=04839f3ad0ad481ebb750f5228278ee8a55b26c17bd61c0a3f2ccc6da50204ae,961
|
|
2
|
-
obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
|
|
3
|
-
obographs/model.py,sha256=3387390f1a30fe47b22c7c346dfdf2b6a595ccbc7cd8d7d09debf4a13f8f6e6c,6846
|
|
4
|
-
obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
|
|
5
|
-
obographs/standardized.py,sha256=15a58dcb9af39a01565fdb096389caf2702f94052e933f9368d93caf620022f5,7099
|
|
6
|
-
obographs-0.0.2.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
|
|
7
|
-
obographs-0.0.2.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
|
|
8
|
-
obographs-0.0.2.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
|
|
9
|
-
obographs-0.0.2.dist-info/METADATA,sha256=3e577f9dcce8afd6b2b0694935c320b898e5253ba6f0dcf8520b4d1a996ef3a3,13429
|
|
10
|
-
obographs-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|