obographs 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: obographs
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: A python data model for OBO Graphs
5
5
  Keywords: snekpack,cookiecutter
6
6
  Author: Charles Tapley Hoyt
@@ -23,6 +23,8 @@ Classifier: Programming Language :: Python :: 3.13
23
23
  Classifier: Programming Language :: Python :: 3 :: Only
24
24
  Classifier: Typing :: Typed
25
25
  Requires-Dist: pydantic
26
+ Requires-Dist: curies>=0.10.7
27
+ Requires-Dist: typing-extensions
26
28
  Requires-Dist: sphinx>=8 ; extra == 'docs'
27
29
  Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
28
30
  Requires-Dist: sphinx-automodapi ; extra == 'docs'
@@ -89,11 +91,25 @@ data model.
89
91
  import obographs
90
92
 
91
93
  url = "https://raw.githubusercontent.com/geneontology/obographs/refs/heads/master/examples/abox.json"
92
- graph_document = obographs.read(url)
94
+ graph_raw = obographs.read(url)
93
95
  ```
94
96
 
95
- Note that the OBO Graph JSON schema uses non-pythonic names. The underlying data
96
- model does not attempt to give better names to the fields.
97
+ The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
98
+ of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
99
+ `obographs` implements a standardization workflow that creates new data
100
+ structures with parsed/normalized URIs and symbols that has Pythonic names. Use
101
+ it like:
102
+
103
+ ```python
104
+ import curies
105
+
106
+ converter = curies.get_bioregistry_converter()
107
+
108
+ graph = graph_raw.standardize(converter)
109
+ ```
110
+
111
+ Now, this graph object will have nice Pythonic names and references parsed as
112
+ `curies.Reference` objects.
97
113
 
98
114
  ## 🚀 Installation
99
115
 
@@ -44,11 +44,25 @@ data model.
44
44
  import obographs
45
45
 
46
46
  url = "https://raw.githubusercontent.com/geneontology/obographs/refs/heads/master/examples/abox.json"
47
- graph_document = obographs.read(url)
47
+ graph_raw = obographs.read(url)
48
48
  ```
49
49
 
50
- Note that the OBO Graph JSON schema uses non-pythonic names. The underlying data
51
- model does not attempt to give better names to the fields.
50
+ The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
51
+ of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
52
+ `obographs` implements a standardization workflow that creates new data
53
+ structures with parsed/normalized URIs and symbols that has Pythonic names. Use
54
+ it like:
55
+
56
+ ```python
57
+ import curies
58
+
59
+ converter = curies.get_bioregistry_converter()
60
+
61
+ graph = graph_raw.standardize(converter)
62
+ ```
63
+
64
+ Now, this graph object will have nice Pythonic names and references parsed as
65
+ `curies.Reference` objects.
52
66
 
53
67
  ## 🚀 Installation
54
68
 
@@ -6,7 +6,7 @@ build-backend = "uv"
6
6
 
7
7
  [project]
8
8
  name = "obographs"
9
- version = "0.0.1"
9
+ version = "0.0.3"
10
10
  description = "A python data model for OBO Graphs"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -52,6 +52,8 @@ license-files = [
52
52
  requires-python = ">=3.10"
53
53
  dependencies = [
54
54
  "pydantic",
55
+ "curies>=0.10.7",
56
+ "typing-extensions",
55
57
  ]
56
58
 
57
59
  [project.optional-dependencies]
@@ -187,7 +189,7 @@ known-first-party = [
187
189
  docstring-code-format = true
188
190
 
189
191
  [tool.bumpversion]
190
- current_version = "0.0.1"
192
+ current_version = "0.0.3"
191
193
  parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?(?:\\+(?P<build>[0-9A-Za-z-]+(?:\\.[0-9A-Za-z-]+)*))?"
192
194
  serialize = [
193
195
  "{major}.{minor}.{patch}-{release}+{build}",
@@ -0,0 +1,33 @@
1
+ """A python data model for OBO Graphs."""
2
+
3
+ from .model import Graph, GraphDocument, Meta, Node, NodeType, Property, Synonym, Xref, read
4
+ from .standardized import (
5
+ StandardizedDefinition,
6
+ StandardizedEdge,
7
+ StandardizedGraph,
8
+ StandardizedMeta,
9
+ StandardizedNode,
10
+ StandardizedProperty,
11
+ StandardizedSynonym,
12
+ StandardizedXref,
13
+ )
14
+
15
+ __all__ = [
16
+ "Graph",
17
+ "GraphDocument",
18
+ "Meta",
19
+ "Node",
20
+ "NodeType",
21
+ "Property",
22
+ "StandardizedDefinition",
23
+ "StandardizedEdge",
24
+ "StandardizedGraph",
25
+ "StandardizedMeta",
26
+ "StandardizedNode",
27
+ "StandardizedProperty",
28
+ "StandardizedSynonym",
29
+ "StandardizedXref",
30
+ "Synonym",
31
+ "Xref",
32
+ "read",
33
+ ]
@@ -13,10 +13,15 @@ import json
13
13
  import logging
14
14
  from collections import defaultdict
15
15
  from pathlib import Path
16
- from typing import Any, Literal, TypeAlias, overload
16
+ from typing import TYPE_CHECKING, Any, Literal, TypeAlias, overload
17
17
 
18
18
  from pydantic import BaseModel, Field
19
19
 
20
+ if TYPE_CHECKING:
21
+ import curies
22
+
23
+ from .standardized import StandardizedGraph
24
+
20
25
  __all__ = [
21
26
  "Definition",
22
27
  "Edge",
@@ -58,10 +63,15 @@ OBO_SYNONYM_TO_OIO: dict[str, SynonymPredicate] = {
58
63
  class Property(BaseModel):
59
64
  """Represent a property inside a metadata element."""
60
65
 
61
- pred: str = Field(...)
62
- val: str = Field(
63
- ...,
66
+ pred: str
67
+ val: str | None = Field(
68
+ None,
69
+ description="Stores the value of the property. This can be a string representing a "
70
+ "literal or IRI. This isn't supposed to be nullable, but it happens a lot - might be a "
71
+ "bug in OWLAPI or ROBOT",
64
72
  )
73
+ xrefs: list[str] | None = None
74
+ meta: Meta | None = None
65
75
 
66
76
 
67
77
  class Definition(BaseModel):
@@ -74,7 +84,7 @@ class Definition(BaseModel):
74
84
  class Xref(BaseModel):
75
85
  """Represents a cross-reference."""
76
86
 
77
- val: str = Field(...)
87
+ val: str
78
88
 
79
89
 
80
90
  class Synonym(BaseModel):
@@ -82,11 +92,12 @@ class Synonym(BaseModel):
82
92
 
83
93
  val: str | None = Field(default=None)
84
94
  pred: str = Field(default="hasExactSynonym")
85
- synonymType: str | None = Field(examples=["OMO:0003000"]) # noqa:N815
95
+ synonymType: str | None = Field(None, examples=["OMO:0003000"]) # noqa:N815
86
96
  xrefs: list[str] = Field(
87
97
  default_factory=list,
88
98
  description="A list of CURIEs/IRIs for provenance for the synonym",
89
99
  )
100
+ meta: Meta | None = None
90
101
 
91
102
 
92
103
  class Meta(BaseModel):
@@ -117,7 +128,7 @@ class Node(BaseModel):
117
128
  id: str = Field(..., description="The IRI for the node")
118
129
  lbl: str | None = Field(None, description="The name of the node")
119
130
  meta: Meta | None = None
120
- type: NodeType = Field(..., description="Type of node")
131
+ type: NodeType | None = Field(None, description="Type of node")
121
132
 
122
133
 
123
134
  class Graph(BaseModel):
@@ -132,11 +143,18 @@ class Graph(BaseModel):
132
143
  domainRangeAxioms: list[Any] = Field(default_factory=list) # noqa:N815
133
144
  propertyChainAxioms: list[Any] = Field(default_factory=list) # noqa:N815
134
145
 
146
+ def standardize(self, converter: curies.Converter) -> StandardizedGraph:
147
+ """Standardize the graph."""
148
+ from .standardized import StandardizedGraph
149
+
150
+ return StandardizedGraph.from_obograph_raw(self, converter)
151
+
135
152
 
136
153
  class GraphDocument(BaseModel):
137
154
  """Represents a list of OBO graphs."""
138
155
 
139
156
  graphs: list[Graph]
157
+ meta: Meta | None = None
140
158
 
141
159
 
142
160
  def get_id_to_node(graph: Graph) -> dict[str, Node]:
@@ -0,0 +1,294 @@
1
+ """Standardize an OBO graph."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+
7
+ from curies import Converter, Reference, vocabulary
8
+ from pydantic import BaseModel, Field
9
+ from typing_extensions import Self
10
+
11
+ from obographs.model import Definition, Edge, Graph, Meta, Node, NodeType, Property, Synonym, Xref
12
+
13
+ __all__ = [
14
+ "StandardizedDefinition",
15
+ "StandardizedEdge",
16
+ "StandardizedGraph",
17
+ "StandardizedMeta",
18
+ "StandardizedNode",
19
+ "StandardizedProperty",
20
+ "StandardizedSynonym",
21
+ "StandardizedXref",
22
+ ]
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class StandardizedProperty(BaseModel):
28
+ """A standardized property."""
29
+
30
+ predicate: Reference
31
+ value: Reference | str = Field(
32
+ ..., description="Parsed into a Reference if a CURIE or IRI, or a string if it's a literal"
33
+ )
34
+ xrefs: list[Reference] | None = None
35
+ meta: StandardizedMeta | None = None
36
+
37
+ @classmethod
38
+ def from_obograph_raw(cls, prop: Property, converter: Converter) -> Self:
39
+ """Instantiate by standardizing a raw OBO Graph object."""
40
+ if not prop.val or not prop.pred:
41
+ raise ValueError
42
+ value: Reference | str | None
43
+ if not prop.val.startswith("http://") and not prop.val.startswith("https"):
44
+ value = _curie_or_uri_to_ref(prop.val, converter)
45
+ else:
46
+ value = prop.val
47
+ if value is None:
48
+ raise ValueError
49
+ return cls(
50
+ predicate=_curie_or_uri_to_ref(prop.pred, converter),
51
+ value=value,
52
+ )
53
+
54
+
55
+ class StandardizedDefinition(BaseModel):
56
+ """A standardized definition."""
57
+
58
+ value: str | None = Field(default=None)
59
+ xrefs: list[Reference] | None = Field(default=None)
60
+
61
+ @classmethod
62
+ def from_obograph_raw(cls, definition: Definition | None, converter: Converter) -> Self | None:
63
+ """Instantiate by standardizing a raw OBO Graph object."""
64
+ if definition is None:
65
+ return None
66
+ return cls(
67
+ value=definition.val,
68
+ xrefs=_parse_list(definition.xrefs, converter),
69
+ )
70
+
71
+
72
+ class StandardizedXref(BaseModel):
73
+ """A standardized database cross-reference."""
74
+
75
+ reference: Reference
76
+
77
+ @classmethod
78
+ def from_obograph_raw(cls, xref: Xref, converter: Converter) -> Self:
79
+ """Instantiate by standardizing a raw OBO Graph object."""
80
+ return cls(reference=_curie_or_uri_to_ref(xref.val, converter))
81
+
82
+
83
+ class StandardizedSynonym(BaseModel):
84
+ """A standardized synonym."""
85
+
86
+ text: str
87
+ predicate: Reference
88
+ type: Reference | None = None
89
+ xrefs: list[Reference] | None = None
90
+
91
+ @classmethod
92
+ def from_obograph_raw(cls, synonym: Synonym, converter: Converter) -> Self:
93
+ """Instantiate by standardizing a raw OBO Graph object."""
94
+ return cls(
95
+ text=synonym.val,
96
+ predicate=Reference(prefix="oboInOwl", identifier=synonym.pred),
97
+ type=synonym.synonymType and _curie_or_uri_to_ref(synonym.synonymType, converter),
98
+ xrefs=_parse_list(synonym.xrefs, converter),
99
+ )
100
+
101
+
102
+ class StandardizedMeta(BaseModel):
103
+ """A standardized meta object."""
104
+
105
+ definition: StandardizedDefinition | None
106
+ subsets: list[Reference] | None = None
107
+ xrefs: list[StandardizedXref] | None = None
108
+ synonyms: list[StandardizedSynonym] | None = None
109
+ comments: list[str] | None = None
110
+ deprecated: bool = False
111
+ version: str | None = None
112
+ properties: list[StandardizedProperty] | None = None
113
+
114
+ @classmethod
115
+ def from_obograph_raw( # noqa:C901
116
+ cls, meta: Meta | None, converter: Converter, flag: str = ""
117
+ ) -> Self | None:
118
+ """Instantiate by standardizing a raw OBO Graph object."""
119
+ if meta is None:
120
+ return None
121
+
122
+ xrefs = []
123
+ for raw_xref in meta.xrefs or []:
124
+ if raw_xref.val:
125
+ try:
126
+ st_xref = StandardizedXref.from_obograph_raw(raw_xref, converter)
127
+ except ValueError:
128
+ logger.debug("[%s] failed to standardize xref: %s", flag, raw_xref)
129
+ else:
130
+ xrefs.append(st_xref)
131
+
132
+ synonyms = []
133
+ for raw_synonym in meta.synonyms or []:
134
+ if raw_synonym.val:
135
+ try:
136
+ s = StandardizedSynonym.from_obograph_raw(raw_synonym, converter)
137
+ except ValueError:
138
+ logger.debug("[%s] failed to standardize synonym: %s", flag, raw_synonym)
139
+ else:
140
+ synonyms.append(s)
141
+
142
+ props = []
143
+ for raw_prop in meta.basicPropertyValues or []:
144
+ if raw_prop.val and raw_prop.pred:
145
+ try:
146
+ prop = StandardizedProperty.from_obograph_raw(raw_prop, converter)
147
+ except ValueError:
148
+ logger.debug("[%s] failed to standardize property: %s", flag, raw_prop)
149
+ else:
150
+ props.append(prop)
151
+
152
+ return cls(
153
+ definition=StandardizedDefinition.from_obograph_raw(meta.definition, converter),
154
+ subsets=[_curie_or_uri_to_ref(subset, converter) for subset in meta.subsets]
155
+ if meta.subsets
156
+ else None,
157
+ xrefs=xrefs or None,
158
+ synonyms=synonyms or None,
159
+ comments=meta.comments,
160
+ version=meta.version,
161
+ deprecated=meta.deprecated,
162
+ properties=props or None,
163
+ )
164
+
165
+
166
+ class StandardizedNode(BaseModel):
167
+ """A standardized node."""
168
+
169
+ reference: Reference
170
+ label: str | None = Field(None)
171
+ meta: StandardizedMeta | None = None
172
+ type: NodeType | None = Field(None, description="Type of node")
173
+
174
+ @classmethod
175
+ def from_obograph_raw(cls, node: Node, converter: Converter) -> Self | None:
176
+ """Instantiate by standardizing a raw OBO Graph object."""
177
+ reference = _curie_or_uri_to_ref(node.id, converter)
178
+ if reference is None:
179
+ logger.warning("failed to parse node's ID %s", node.id)
180
+ return None
181
+ return cls(
182
+ reference=reference,
183
+ label=node.lbl,
184
+ meta=StandardizedMeta.from_obograph_raw(node.meta, converter, flag=reference.curie),
185
+ type=node.type,
186
+ )
187
+
188
+
189
+ class StandardizedEdge(BaseModel):
190
+ """A standardized edge."""
191
+
192
+ subject: Reference
193
+ predicate: Reference
194
+ object: Reference
195
+ meta: StandardizedMeta | None = None
196
+
197
+ @classmethod
198
+ def from_obograph_raw(cls, edge: Edge, converter: Converter) -> Self | None:
199
+ """Instantiate by standardizing a raw OBO Graph object."""
200
+ subject = _curie_or_uri_to_ref(edge.sub, converter)
201
+ if not subject:
202
+ logger.warning("failed to parse edge's subject %s", edge.sub)
203
+ return None
204
+ predicate = _curie_or_uri_to_ref(edge.pred, converter)
205
+ if not predicate:
206
+ logger.warning("failed to parse edge's predicate %s", edge.pred)
207
+ return None
208
+ obj = _curie_or_uri_to_ref(edge.obj, converter)
209
+ if not obj:
210
+ logger.warning("failed to parse edge's object %s", edge.obj)
211
+ return None
212
+ return cls(
213
+ subject=subject,
214
+ predicate=predicate,
215
+ object=obj,
216
+ meta=StandardizedMeta.from_obograph_raw(
217
+ edge.meta, converter, flag=f"{subject.curie} {predicate.curie} {obj.curie}"
218
+ ),
219
+ )
220
+
221
+
222
+ class StandardizedGraph(BaseModel):
223
+ """A standardized graph."""
224
+
225
+ id: str | None = None
226
+ meta: StandardizedMeta | None = None
227
+ nodes: list[StandardizedNode] = Field(default_factory=list)
228
+ edges: list[StandardizedEdge] = Field(default_factory=list)
229
+
230
+ # TODO other bits
231
+
232
+ @classmethod
233
+ def from_obograph_raw(cls, graph: Graph, converter: Converter) -> Self:
234
+ """Instantiate by standardizing a raw OBO Graph object."""
235
+ return cls(
236
+ id=graph.id,
237
+ meta=StandardizedMeta.from_obograph_raw(graph.meta, converter, flag=graph.id or ""),
238
+ nodes=[
239
+ s_node
240
+ for node in graph.nodes
241
+ if (s_node := StandardizedNode.from_obograph_raw(node, converter))
242
+ ],
243
+ edges=[
244
+ s_edge
245
+ for edge in graph.edges
246
+ if (s_edge := StandardizedEdge.from_obograph_raw(edge, converter))
247
+ ],
248
+ )
249
+
250
+ def _get_property(self, predicate: Reference) -> str | Reference | None:
251
+ if self.meta is None:
252
+ return None
253
+
254
+ for p in self.meta.properties or []:
255
+ if p.predicate == predicate:
256
+ return p.value
257
+
258
+ return None
259
+
260
+ @property
261
+ def name(self) -> str | None:
262
+ """Look up the name of the graph."""
263
+ r = self._get_property(Reference(prefix="dcterms", identifier="title"))
264
+ if isinstance(r, Reference):
265
+ raise TypeError
266
+ return r
267
+
268
+
269
+ def _parse_list(curie_or_uris: list[str] | None, converter: Converter) -> list[Reference] | None:
270
+ if not curie_or_uris:
271
+ return None
272
+ return [
273
+ reference
274
+ for curie_or_uri in curie_or_uris
275
+ if (reference := _curie_or_uri_to_ref(curie_or_uri, converter))
276
+ ]
277
+
278
+
279
+ #: defined in https://github.com/geneontology/obographs/blob/6676b10a5cce04707d75b9dd46fa08de70322b0b/obographs-owlapi/src/main/java/org/geneontology/obographs/owlapi/FromOwl.java#L36-L39
280
+ BUILTINS = {
281
+ "is_a": vocabulary.is_a,
282
+ "subPropertyOf": vocabulary.subproperty_of,
283
+ "type": vocabulary.rdf_type,
284
+ "inverseOf": Reference(prefix="owl", identifier="inverseOf"),
285
+ }
286
+
287
+
288
+ def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference | None:
289
+ if s in BUILTINS:
290
+ return BUILTINS[s]
291
+ reference_tuple = converter.parse(s, strict=False)
292
+ if reference_tuple is not None:
293
+ return reference_tuple.to_pydantic()
294
+ return None
@@ -12,7 +12,7 @@ __all__ = [
12
12
  "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.0.1"
15
+ VERSION = "0.0.3"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -1,14 +0,0 @@
1
- """A python data model for OBO Graphs."""
2
-
3
- from .model import Graph, GraphDocument, Meta, Node, Property, Synonym, Xref, read
4
-
5
- __all__ = [
6
- "Graph",
7
- "GraphDocument",
8
- "Meta",
9
- "Node",
10
- "Property",
11
- "Synonym",
12
- "Xref",
13
- "read",
14
- ]
File without changes