obographs 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
obographs/__init__.py CHANGED
@@ -1,13 +1,32 @@
1
1
  """A python data model for OBO Graphs."""
2
2
 
3
- from .model import Graph, GraphDocument, Meta, Node, Property, Synonym, Xref, read
3
+ from .model import Graph, GraphDocument, Meta, Node, NodeType, Property, Synonym, Xref, read
4
+ from .standardized import (
5
+ StandardizedDefinition,
6
+ StandardizedEdge,
7
+ StandardizedGraph,
8
+ StandardizedMeta,
9
+ StandardizedNode,
10
+ StandardizedProperty,
11
+ StandardizedSynonym,
12
+ StandardizedXref,
13
+ )
4
14
 
5
15
  __all__ = [
6
16
  "Graph",
7
17
  "GraphDocument",
8
18
  "Meta",
9
19
  "Node",
20
+ "NodeType",
10
21
  "Property",
22
+ "StandardizedDefinition",
23
+ "StandardizedEdge",
24
+ "StandardizedGraph",
25
+ "StandardizedMeta",
26
+ "StandardizedNode",
27
+ "StandardizedProperty",
28
+ "StandardizedSynonym",
29
+ "StandardizedXref",
11
30
  "Synonym",
12
31
  "Xref",
13
32
  "read",
obographs/model.py CHANGED
@@ -64,7 +64,12 @@ class Property(BaseModel):
64
64
  """Represent a property inside a metadata element."""
65
65
 
66
66
  pred: str
67
- val: str
67
+ val: str | None = Field(
68
+ None,
69
+ description="Stores the value of the property. This can be a string representing a "
70
+ "literal or IRI. This isn't supposed to be nullable, but it happens a lot - might be a "
71
+ "bug in OWLAPI or ROBOT",
72
+ )
68
73
  xrefs: list[str] | None = None
69
74
  meta: Meta | None = None
70
75
 
@@ -87,7 +92,7 @@ class Synonym(BaseModel):
87
92
 
88
93
  val: str | None = Field(default=None)
89
94
  pred: str = Field(default="hasExactSynonym")
90
- synonymType: str | None = Field(examples=["OMO:0003000"]) # noqa:N815
95
+ synonymType: str | None = Field(None, examples=["OMO:0003000"]) # noqa:N815
91
96
  xrefs: list[str] = Field(
92
97
  default_factory=list,
93
98
  description="A list of CURIEs/IRIs for provenance for the synonym",
@@ -123,7 +128,7 @@ class Node(BaseModel):
123
128
  id: str = Field(..., description="The IRI for the node")
124
129
  lbl: str | None = Field(None, description="The name of the node")
125
130
  meta: Meta | None = None
126
- type: NodeType = Field(..., description="Type of node")
131
+ type: NodeType | None = Field(None, description="Type of node")
127
132
 
128
133
 
129
134
  class Graph(BaseModel):
obographs/standardized.py CHANGED
@@ -2,7 +2,9 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from curies import Converter, Reference
5
+ import logging
6
+
7
+ from curies import Converter, Reference, vocabulary
6
8
  from pydantic import BaseModel, Field
7
9
  from typing_extensions import Self
8
10
 
@@ -14,24 +16,39 @@ __all__ = [
14
16
  "StandardizedGraph",
15
17
  "StandardizedMeta",
16
18
  "StandardizedNode",
19
+ "StandardizedProperty",
20
+ "StandardizedSynonym",
17
21
  "StandardizedXref",
18
22
  ]
19
23
 
24
+ logger = logging.getLogger(__name__)
25
+
20
26
 
21
27
  class StandardizedProperty(BaseModel):
22
28
  """A standardized property."""
23
29
 
24
30
  predicate: Reference
25
- value: Reference
31
+ value: Reference | str = Field(
32
+ ..., description="Parsed into a Reference if a CURIE or IRI, or a string if it's a literal"
33
+ )
26
34
  xrefs: list[Reference] | None = None
27
35
  meta: StandardizedMeta | None = None
28
36
 
29
37
  @classmethod
30
38
  def from_obograph_raw(cls, prop: Property, converter: Converter) -> Self:
31
39
  """Instantiate by standardizing a raw OBO Graph object."""
40
+ if not prop.val or not prop.pred:
41
+ raise ValueError
42
+ value: Reference | str | None
43
+ if not prop.val.startswith("http://") and not prop.val.startswith("https"):
44
+ value = _curie_or_uri_to_ref(prop.val, converter)
45
+ else:
46
+ value = prop.val
47
+ if value is None:
48
+ raise ValueError
32
49
  return cls(
33
50
  predicate=_curie_or_uri_to_ref(prop.pred, converter),
34
- value=_curie_or_uri_to_ref(prop.val, converter),
51
+ value=value,
35
52
  )
36
53
 
37
54
 
@@ -95,33 +112,54 @@ class StandardizedMeta(BaseModel):
95
112
  properties: list[StandardizedProperty] | None = None
96
113
 
97
114
  @classmethod
98
- def from_obograph_raw(cls, meta: Meta | None, converter: Converter) -> Self | None:
115
+ def from_obograph_raw( # noqa:C901
116
+ cls, meta: Meta | None, converter: Converter, flag: str = ""
117
+ ) -> Self | None:
99
118
  """Instantiate by standardizing a raw OBO Graph object."""
100
119
  if meta is None:
101
120
  return None
121
+
122
+ xrefs = []
123
+ for raw_xref in meta.xrefs or []:
124
+ if raw_xref.val:
125
+ try:
126
+ st_xref = StandardizedXref.from_obograph_raw(raw_xref, converter)
127
+ except ValueError:
128
+ logger.debug("[%s] failed to standardize xref: %s", flag, raw_xref)
129
+ else:
130
+ xrefs.append(st_xref)
131
+
132
+ synonyms = []
133
+ for raw_synonym in meta.synonyms or []:
134
+ if raw_synonym.val:
135
+ try:
136
+ s = StandardizedSynonym.from_obograph_raw(raw_synonym, converter)
137
+ except ValueError:
138
+ logger.debug("[%s] failed to standardize synonym: %s", flag, raw_synonym)
139
+ else:
140
+ synonyms.append(s)
141
+
142
+ props = []
143
+ for raw_prop in meta.basicPropertyValues or []:
144
+ if raw_prop.val and raw_prop.pred:
145
+ try:
146
+ prop = StandardizedProperty.from_obograph_raw(raw_prop, converter)
147
+ except ValueError:
148
+ logger.debug("[%s] failed to standardize property: %s", flag, raw_prop)
149
+ else:
150
+ props.append(prop)
151
+
102
152
  return cls(
103
153
  definition=StandardizedDefinition.from_obograph_raw(meta.definition, converter),
104
154
  subsets=[_curie_or_uri_to_ref(subset, converter) for subset in meta.subsets]
105
155
  if meta.subsets
106
156
  else None,
107
- xrefs=[StandardizedXref.from_obograph_raw(xref, converter) for xref in meta.xrefs]
108
- if meta.xrefs
109
- else None,
110
- synonyms=[
111
- StandardizedSynonym.from_obograph_raw(synonym, converter)
112
- for synonym in meta.synonyms
113
- ]
114
- if meta.synonyms
115
- else None,
157
+ xrefs=xrefs or None,
158
+ synonyms=synonyms or None,
116
159
  comments=meta.comments,
117
160
  version=meta.version,
118
161
  deprecated=meta.deprecated,
119
- properties=[
120
- StandardizedProperty.from_obograph_raw(p, converter)
121
- for p in meta.basicPropertyValues
122
- ]
123
- if meta.basicPropertyValues
124
- else None,
162
+ properties=props or None,
125
163
  )
126
164
 
127
165
 
@@ -131,15 +169,19 @@ class StandardizedNode(BaseModel):
131
169
  reference: Reference
132
170
  label: str | None = Field(None)
133
171
  meta: StandardizedMeta | None = None
134
- type: NodeType = Field(..., description="Type of node")
172
+ type: NodeType | None = Field(None, description="Type of node")
135
173
 
136
174
  @classmethod
137
- def from_obograph_raw(cls, node: Node, converter: Converter) -> Self:
175
+ def from_obograph_raw(cls, node: Node, converter: Converter) -> Self | None:
138
176
  """Instantiate by standardizing a raw OBO Graph object."""
177
+ reference = _curie_or_uri_to_ref(node.id, converter)
178
+ if reference is None:
179
+ logger.warning("failed to parse node's ID %s", node.id)
180
+ return None
139
181
  return cls(
140
- reference=_curie_or_uri_to_ref(node.id, converter),
182
+ reference=reference,
141
183
  label=node.lbl,
142
- meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
184
+ meta=StandardizedMeta.from_obograph_raw(node.meta, converter, flag=reference.curie),
143
185
  type=node.type,
144
186
  )
145
187
 
@@ -153,13 +195,27 @@ class StandardizedEdge(BaseModel):
153
195
  meta: StandardizedMeta | None = None
154
196
 
155
197
  @classmethod
156
- def from_obograph_raw(cls, node: Edge, converter: Converter) -> Self:
198
+ def from_obograph_raw(cls, edge: Edge, converter: Converter) -> Self | None:
157
199
  """Instantiate by standardizing a raw OBO Graph object."""
200
+ subject = _curie_or_uri_to_ref(edge.sub, converter)
201
+ if not subject:
202
+ logger.warning("failed to parse edge's subject %s", edge.sub)
203
+ return None
204
+ predicate = _curie_or_uri_to_ref(edge.pred, converter)
205
+ if not predicate:
206
+ logger.warning("failed to parse edge's predicate %s", edge.pred)
207
+ return None
208
+ obj = _curie_or_uri_to_ref(edge.obj, converter)
209
+ if not obj:
210
+ logger.warning("failed to parse edge's object %s", edge.obj)
211
+ return None
158
212
  return cls(
159
- subject=_curie_or_uri_to_ref(node.sub, converter),
160
- predicate=_curie_or_uri_to_ref(node.pred, converter),
161
- object=_curie_or_uri_to_ref(node.obj, converter),
162
- meta=StandardizedMeta.from_obograph_raw(node.meta, converter),
213
+ subject=subject,
214
+ predicate=predicate,
215
+ object=obj,
216
+ meta=StandardizedMeta.from_obograph_raw(
217
+ edge.meta, converter, flag=f"{subject.curie} {predicate.curie} {obj.curie}"
218
+ ),
163
219
  )
164
220
 
165
221
 
@@ -178,33 +234,61 @@ class StandardizedGraph(BaseModel):
178
234
  """Instantiate by standardizing a raw OBO Graph object."""
179
235
  return cls(
180
236
  id=graph.id,
181
- meta=StandardizedMeta.from_obograph_raw(graph.meta, converter),
182
- nodes=[StandardizedNode.from_obograph_raw(node, converter) for node in graph.nodes],
183
- edges=[StandardizedEdge.from_obograph_raw(edge, converter) for edge in graph.edges],
237
+ meta=StandardizedMeta.from_obograph_raw(graph.meta, converter, flag=graph.id or ""),
238
+ nodes=[
239
+ s_node
240
+ for node in graph.nodes
241
+ if (s_node := StandardizedNode.from_obograph_raw(node, converter))
242
+ ],
243
+ edges=[
244
+ s_edge
245
+ for edge in graph.edges
246
+ if (s_edge := StandardizedEdge.from_obograph_raw(edge, converter))
247
+ ],
184
248
  )
185
249
 
250
+ def _get_property(self, predicate: Reference) -> str | Reference | None:
251
+ if self.meta is None:
252
+ return None
253
+
254
+ for p in self.meta.properties or []:
255
+ if p.predicate == predicate:
256
+ return p.value
257
+
258
+ return None
259
+
260
+ @property
261
+ def name(self) -> str | None:
262
+ """Look up the name of the graph."""
263
+ r = self._get_property(Reference(prefix="dcterms", identifier="title"))
264
+ if isinstance(r, Reference):
265
+ raise TypeError
266
+ return r
267
+
186
268
 
187
- def _parse_list(ss: list[str] | None, converter: Converter) -> list[Reference] | None:
188
- if not ss:
269
+ def _parse_list(curie_or_uris: list[str] | None, converter: Converter) -> list[Reference] | None:
270
+ if not curie_or_uris:
189
271
  return None
190
- return [_curie_or_uri_to_ref(x, converter) for x in ss]
272
+ return [
273
+ reference
274
+ for curie_or_uri in curie_or_uris
275
+ if (reference := _curie_or_uri_to_ref(curie_or_uri, converter))
276
+ ]
191
277
 
192
278
 
193
279
  #: defined in https://github.com/geneontology/obographs/blob/6676b10a5cce04707d75b9dd46fa08de70322b0b/obographs-owlapi/src/main/java/org/geneontology/obographs/owlapi/FromOwl.java#L36-L39
194
280
  BUILTINS = {
195
- "is_a": Reference(prefix="rdfs", identifier="subClassOf"),
196
- "subPropertyOf": Reference(prefix="rdfs", identifier="subPropertyOf"),
197
- "type": Reference(prefix="rdf", identifier="type"),
281
+ "is_a": vocabulary.is_a,
282
+ "subPropertyOf": vocabulary.subproperty_of,
283
+ "type": vocabulary.rdf_type,
198
284
  "inverseOf": Reference(prefix="owl", identifier="inverseOf"),
199
285
  }
200
286
 
201
287
 
202
- def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference:
288
+ def _curie_or_uri_to_ref(s: str, converter: Converter) -> Reference | None:
203
289
  if s in BUILTINS:
204
290
  return BUILTINS[s]
205
- if converter.is_uri(s):
206
- p, o = converter.parse_uri(s)
207
- return Reference(prefix=p, identifier=o)
208
- elif converter.is_curie(s):
209
- pass
210
- raise ValueError(f"can't parse string: {s}")
291
+ reference_tuple = converter.parse(s, strict=False)
292
+ if reference_tuple is not None:
293
+ return reference_tuple.to_pydantic()
294
+ return None
obographs/version.py CHANGED
@@ -12,7 +12,7 @@ __all__ = [
12
12
  "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.0.2"
15
+ VERSION = "0.0.3"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: obographs
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: A python data model for OBO Graphs
5
5
  Keywords: snekpack,cookiecutter
6
6
  Author: Charles Tapley Hoyt
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.13
23
23
  Classifier: Programming Language :: Python :: 3 :: Only
24
24
  Classifier: Typing :: Typed
25
25
  Requires-Dist: pydantic
26
- Requires-Dist: curies
26
+ Requires-Dist: curies>=0.10.7
27
27
  Requires-Dist: typing-extensions
28
28
  Requires-Dist: sphinx>=8 ; extra == 'docs'
29
29
  Requires-Dist: sphinx-rtd-theme>=3.0 ; extra == 'docs'
@@ -97,7 +97,7 @@ graph_raw = obographs.read(url)
97
97
  The OBO Graph JSON schema uses non-Pythonic names, and it's inherently not aware
98
98
  of semantics - it uses a combination of URIs and ad-hoc symbols as identifiers.
99
99
  `obographs` implements a standardization workflow that creates new data
100
- structures with parsed/normalized URIs and symbols that has Pythonic nams. Use
100
+ structures with parsed/normalized URIs and symbols that has Pythonic names. Use
101
101
  it like:
102
102
 
103
103
  ```python
@@ -0,0 +1,10 @@
1
+ obographs/version.py,sha256=3709d2674acd39467891048cff0ca525f08edfba747910e241f23b6fb168d2dc,961
2
+ obographs/__init__.py,sha256=0c3d73d035bde44a5375cbc4dffcf314fc89994853777e0048f1307b0fdd53a8,706
3
+ obographs/model.py,sha256=445dbea604eb3d732c691afee71291348a6185750121ac41fcfeb8973b6a219b,7120
4
+ obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
5
+ obographs/standardized.py,sha256=27955c754e0f077aa8a4c36e9941331e5b8a716bb2cc21295b7ede7917bbc18d,9859
6
+ obographs-0.0.3.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
7
+ obographs-0.0.3.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
8
+ obographs-0.0.3.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
9
+ obographs-0.0.3.dist-info/METADATA,sha256=ce84eea275fad376b42901cbd5d93982cae494d0595cea5cbd7ee44748f6631c,13438
10
+ obographs-0.0.3.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- obographs/version.py,sha256=04839f3ad0ad481ebb750f5228278ee8a55b26c17bd61c0a3f2ccc6da50204ae,961
2
- obographs/__init__.py,sha256=93807a9cc6f4002ac923a84a8c5e1a9b6301c5368cd61a957c9115adfaef1a56,254
3
- obographs/model.py,sha256=3387390f1a30fe47b22c7c346dfdf2b6a595ccbc7cd8d7d09debf4a13f8f6e6c,6846
4
- obographs/py.typed,sha256=01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b,1
5
- obographs/standardized.py,sha256=15a58dcb9af39a01565fdb096389caf2702f94052e933f9368d93caf620022f5,7099
6
- obographs-0.0.2.dist-info/licenses/LICENSE,sha256=4be0ec343e3bf11fd54321a6b576d5616ebb7d18898f741f63c517209e33bcb2,1076
7
- obographs-0.0.2.dist-info/WHEEL,sha256=e3765529bb0cc791d07188d72ec6a759d7625ff6d3a5e4b710d25409bae03770,79
8
- obographs-0.0.2.dist-info/entry_points.txt,sha256=9a9819cedd2186e28d5d42ddce5e3de1417b0db2b07392ff35f9adc7c86a8619,50
9
- obographs-0.0.2.dist-info/METADATA,sha256=3e577f9dcce8afd6b2b0694935c320b898e5253ba6f0dcf8520b4d1a996ef3a3,13429
10
- obographs-0.0.2.dist-info/RECORD,,