obographs 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
obographs/model.py CHANGED
@@ -13,10 +13,15 @@ import json
13
13
  import logging
14
14
  from collections import defaultdict
15
15
  from pathlib import Path
16
- from typing import Any, Literal, TypeAlias, overload
16
+ from typing import TYPE_CHECKING, Any, Literal, TypeAlias, overload
17
17
 
18
18
  from pydantic import BaseModel, Field
19
19
 
20
+ if TYPE_CHECKING:
21
+ import curies
22
+
23
+ from .standardized import StandardizedGraph
24
+
20
25
  __all__ = [
21
26
  "Definition",
22
27
  "Edge",
@@ -58,10 +63,10 @@ OBO_SYNONYM_TO_OIO: dict[str, SynonymPredicate] = {
58
63
  class Property(BaseModel):
59
64
  """Represent a property inside a metadata element."""
60
65
 
61
- pred: str = Field(...)
62
- val: str = Field(
63
- ...,
64
- )
66
+ pred: str
67
+ val: str
68
+ xrefs: list[str] | None = None
69
+ meta: Meta | None = None
65
70
 
66
71
 
67
72
  class Definition(BaseModel):
@@ -74,7 +79,7 @@ class Definition(BaseModel):
74
79
  class Xref(BaseModel):
75
80
  """Represents a cross-reference."""
76
81
 
77
- val: str = Field(...)
82
+ val: str
78
83
 
79
84
 
80
85
  class Synonym(BaseModel):
@@ -87,6 +92,7 @@ class Synonym(BaseModel):
87
92
  default_factory=list,
88
93
  description="A list of CURIEs/IRIs for provenance for the synonym",
89
94
  )
95
+ meta: Meta | None = None
90
96
 
91
97
 
92
98
  class Meta(BaseModel):
@@ -132,11 +138,18 @@ class Graph(BaseModel):
132
138
  domainRangeAxioms: list[Any] = Field(default_factory=list) # noqa:N815
133
139
  propertyChainAxioms: list[Any] = Field(default_factory=list) # noqa:N815
134
140
 
141
+ def standardize(self, converter: curies.Converter) -> StandardizedGraph:
142
+ """Standardize the graph."""
143
+ from .standardized import StandardizedGraph
144
+
145
+ return StandardizedGraph.from_obograph_raw(self, converter)
146
+
135
147
 
136
148
  class GraphDocument(BaseModel):
137
149
  """Represents a list of OBO graphs."""
138
150
 
139
151
  graphs: list[Graph]
152
+ meta: Meta | None = None
140
153
 
141
154
 
142
155
  def get_id_to_node(graph: Graph) -> dict[str, Node]:
@@ -0,0 +1,210 @@
1
+ """Standardize an OBO graph."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from curies import Converter, Reference
6
+ from pydantic import BaseModel, Field
7
+ from typing_extensions import Self
8
+
9
+ from obographs.model import Definition, Edge, Graph, Meta, Node, NodeType, Property, Synonym, Xref
10
+
11
+ __all__ = [
12
+ "StandardizedDefinition",
13
+ "StandardizedEdge",
14
+ "StandardizedGraph",
15
+ "StandardizedMeta",
16
+ "StandardizedNode",
17
+ "StandardizedXref",
18
+ ]
19
+
20
+
21
+ class StandardizedProperty(BaseModel):
22
+ """A standardized property."""
23
+
24
+ predicate: Reference
25
+ value: Reference
26
+ xrefs: list[Reference] | None = None
27
+ meta: StandardizedMeta | None = None
28
+
29
+ @classmethod
30
+ def from_obograph_raw(cls, prop: Property, converter: Converter) -> Self:
31
+ """Instantiate by standardizing a raw OBO Graph object."""
32
+ return cls(
33
+ predicate=_curie_or_uri_to_ref(prop.pred, converter),
34
+ value=_curie_or_uri_to_ref(prop.val, converter),
35
+ )
36
+
37
+
38
+ class StandardizedDefinition(BaseModel):
39
+ """A standardized definition."""
40
+
41
+ value: str | None = Field(default=None)
42
+ xrefs: list[Reference] | None = Field(default=None)
43
+
44
+ @classmethod
45
+ def from_obograph_raw(cls, definition: Definition | None, converter: Converter) -> Self | None:
46
+ """Instantiate by standardizing a raw OBO Graph object."""
47
+ if definition is None:
48
+ return None
49
+ return cls(
50
+ value=definition.val,
51
+ xrefs=_parse_list(definition.xrefs, converter),
52
+ )
53
+
54
+
55
+ class StandardizedXref(BaseModel):
56
+ """A standardized database cross-reference."""
57
+
58
+ reference: Reference
59
+
60
+ @classmethod
61
+ def from_obograph_raw(cls, xref: Xref, converter: Converter) -> Self:
62
+ """Instantiate by standardizing a raw OBO Graph object."""
63
+ return cls(reference=_curie_or_uri_to_ref(xref.val, converter))
64
+
65
+
66
+ class StandardizedSynonym(BaseModel):
67
+ """A standardized synonym."""
68
+
69
+ text: str
70
+ predicate: Reference
71
+ type: Reference | None = None
72
+ xrefs: list[Reference] | None = None
73
+
74
+ @classmethod
75
+ def from_obograph_raw(cls, synonym: Synonym, converter: Converter) -> Self:
76
+ """Instantiate by standardizing a raw OBO Graph object."""
77
+ return cls(
78
+ text=synonym.val,
79
+ predicate=Reference(prefix="oboInOwl", identifier=synonym.pred),
80
+ type=synonym.synonymType and _curie_or_uri_to_ref(synonym.synonymType, converter),
81
+ xrefs=_parse_list(synonym.xrefs, converter),
82
+ )
83
+
84
+
85
+ class StandardizedMeta(BaseModel):
86
+ """A standardized meta object."""
87
+
88
+ definition: StandardizedDefinition | None
89
+ subsets: list[Reference] | None = None
90
+ xrefs: list[StandardizedXref] | None = None
91
+ synonyms: list[StandardizedSynonym] | None = None
92
+ comments: list[str] | None = None
93
+ deprecated: bool = False
94
+ version: str | None = None
95
+ properties: list[StandardizedProperty] | None = None
96
+
97
+ @classmethod
98
+ def from_obograph_raw(cls, meta: Meta | None, converter: Converter) -> Self | None:
99
+ """Instantiate by standardizing a raw OBO Graph object."""
100
+ if meta is None:
101
+ return None
102
+ return cls(
103
+ definition=StandardizedDefinition.from_obograph_raw(meta.definition, converter),
104
+ subsets=[_curie_or_uri_to_ref(subset, converter) for subset in meta.subsets]
105
+ if meta.subsets
106
+ else None,
107
+ xrefs=[StandardizedXref.from_obograph_raw(xref, converter) for xref in meta.xrefs]
108
+ if meta.xrefs
109
+ else None,
110
+ synonyms=[
111
+ StandardizedSynonym.from_obograph_raw(synonym, converter)
112
+ for synonym in meta.synonyms
113
+ ]
114
+ if meta.synonyms
115
+ else None,
116
+ comments=meta.comments,
117
+ version=meta.version,
118
+ deprecated=meta.deprecated,
119
+ properties=[
120
+ StandardizedProperty.from_obograph_raw(p, converter)
121
+ for p in meta.basicPropertyValues
122
+ ]
123
+ if meta.basicPropertyValues
124
+ else None,
125
+ )
126
+
127
+
128
+ class StandardizedNode(BaseModel):
129
+ """A standardized node."""
130
+
131
+ reference: Reference
132
+ label: str | None = Field(None)
133
+ meta: StandardizedMeta | None = None
134
+ type: NodeType = Field(..., description="Type of node")
135
+
136
+ @classmethod
137
+ def from_obograph_raw(cls, node: Node, converter: Converter) -> Self:
138
+ """Instantiate by standardizing a raw OBO Graph object."""
139
+ return cls(
140
+ reference=_curie_or_uri_to_ref(node.id, converter),
141
+ label=node.lbl,
142
+ meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
143
+ type=node.type,
144
+ )
145
+
146
+
147
+ class StandardizedEdge(BaseModel):
148
+ """A standardized edge."""
149
+
150
+ subject: Reference
151
+ predicate: Reference
152
+ object: Reference
153
+ meta: StandardizedMeta | None = None
154
+
155
+ @classmethod
156
+ def from_obograph_raw(cls, node: Edge, converter: Converter) -> Self:
157
+ """Instantiate by standardizing a raw OBO Graph object."""
158
+ return cls(
159
+ subject=_curie_or_uri_to_ref(node.sub, converter),
160
+ predicate=_curie_or_uri_to_ref(node.pred, converter),
161
+ object=_curie_or_uri_to_ref(node.obj, converter),
162
+ meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
163
+ )
164
+
165
+
166
+ class StandardizedGraph(BaseModel):
167
+ """A standardized graph."""
168
+
169
+ id: str | None = None
170
+ meta: StandardizedMeta | None = None
171
+ nodes: list[StandardizedNode] = Field(default_factory=list)
172
+ edges: list[StandardizedEdge] = Field(default_factory=list)
173
+
174
+ # TODO other bits
175
+
176
+ @classmethod
177
+ def from_obograph_raw(cls, graph: Graph, converter: Converter) -> Self:
178
+ """Instantiate by standardizing a raw OBO Graph object."""
179
+ return cls(
180
+ id=graph.id,
181
+ meta=StandardizedMeta.from_obograph_raw(graph.meta, converter),
182
+ nodes=[StandardizedNode.from_obograph_raw(node, converter) for node in graph.nodes],
183
+ edges=[StandardizedEdge.from_obograph_raw(edge, converter) for edge in graph.edges],
184
+ )
185
+
186
+
187
+ def _parse_list(ss: list[str] | None, converter: Converter) -> list[Reference] | None:
188
+ if not ss:
189
+ return None
190
+ return [_curie_or_uri_to_ref(x, converter) for x in ss]
191
+
192
+
193
+ #: defined in https://github.com/geneontology/obographs/blob/6676b10a5cce04707d75b9dd46fa08de70322b0b/obographs-owlapi/src/main/java/org/geneontology/obographs/owlapi/FromOwl.java#L36-L39
194
+ BUILTINS = {
195
+ "is_a": Reference(prefix="rdfs", identifier="subClassOf"),
196
+ "subPropertyOf": Reference(prefix="rdfs", identifier="subPropertyOf"),
197
+ "type": Reference(prefix="rdf", identifier="type"),
198
+ "inverseOf": Reference(prefix="owl", identifier="inverseOf"),
199
+ }
200
+
201
+
202
+ def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference:
203
+ if s in BUILTINS:
204
+ return BUILTINS[s]
205
+ if converter.is_uri(s):
206
+ p, o = converter.parse_uri(s)
207
+ return Reference(prefix=p, identifier=o)
208
+ elif converter.is_curie(s):
209
+ pass
210
+ raise ValueError(f"can't parse string: {s}")
obographs/version.py CHANGED
@@ -12,7 +12,7 @@ __all__ = [
12
12
  "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.0.1"
15
+ VERSION = "0.0.2"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: obographs
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: A python data model for OBO Graphs
5
5
  Keywords: snekpack,cookiecutter
6
6
  Author: Charles Tapley Hoyt
@@ -23,6 +23,8 @@ Classifier: Programming Language :: Python :: 3.13
23
23
  Classifier: Programming Language :: Python :: 3 :: Only
24
24
  Classifier: Typing :: Typed
25
25
  Requires-Dist: pydantic
26
+ Requires-Dist: curies
27
+ Requires-Dist: typing-extensions
26
28
  Requires-Dist: sphinx>=8 ; extra == 'docs'
27
29
  Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
28
30
  Requires-Dist: sphinx-automodapi ; extra == 'docs'
@@ -89,11 +91,25 @@ data model.
89
91
  import obographs
90
92
 
91
93
  url = "https://raw.githubusercontent.com/geneontology/obographs/refs/heads/master/examples/abox.json"
92
- graph_document = obographs.read(url)
94
+ graph_raw = obographs.read(url)
93
95
  ```
94
96
 
95
- Note that the OBO Graph JSON schema uses non-pythonic names. The underlying data
96
- model does not attempt to give better names to the fields.
97
+ The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
98
+ of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
99
+ `obographs` implements a standardization workflow that creates new data
100
+ structures with parsed/normalized URIs and symbols that has Pythonic nams. Use
101
+ it like:
102
+
103
+ ```python
104
+ import curies
105
+
106
+ converter = curies.get_bioregistry_converter()
107
+
108
+ graph = graph_raw.standardize(converter)
109
+ ```
110
+
111
+ Now, this graph object will have nice Pythonic names and references parsed as
112
+ `curies.Reference` objects.
97
113
 
98
114
  ## 🚀 Installation
99
115
 
@@ -0,0 +1,10 @@
1
+ obographs/version.py,sha256=04839f3ad0ad481ebb750f5228278ee8a55b26c17bd61c0a3f2ccc6da50204ae,961
2
+ obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
3
+ obographs/model.py,sha256=3387390f1a30fe47b22c7c346dfdf2b6a595ccbc7cd8d7d09debf4a13f8f6e6c,6846
4
+ obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
5
+ obographs/standardized.py,sha256=15a58dcb9af39a01565fdb096389caf2702f94052e933f9368d93caf620022f5,7099
6
+ obographs-0.0.2.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
7
+ obographs-0.0.2.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
8
+ obographs-0.0.2.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
9
+ obographs-0.0.2.dist-info/METADATA,sha256=3e577f9dcce8afd6b2b0694935c320b898e5253ba6f0dcf8520b4d1a996ef3a3,13429
10
+ obographs-0.0.2.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- obographs/version.py,sha256=0ce3c0b54472caa26dc5eacd65d21c9f7a242a94f5f9ea4d972cf08c2775018b,961
2
- obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
3
- obographs/model.py,sha256=2032a2fb41761be360fbae3280ae5db570c12eb528bd3215c9d2e39494934ada,6441
4
- obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
5
- obographs-0.0.1.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
6
- obographs-0.0.1.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
7
- obographs-0.0.1.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
8
- obographs-0.0.1.dist-info/METADATA,sha256=7a7a77f96ddea3e845896d0a37e44ef7a559187f7c01738c77d2fb1a881ffcb9,12972
9
- obographs-0.0.1.dist-info/RECORD,,