pyobo 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ """Import of OBO Graph JSON."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import curies
7
+ import obographs
8
+ from curies import Converter
9
+ from curies.vocabulary import SynonymScope, synonym_scopes
10
+ from obographs import (
11
+ Graph,
12
+ NodeType,
13
+ StandardizedGraph,
14
+ StandardizedMeta,
15
+ StandardizedNode,
16
+ StandardizedSynonym,
17
+ )
18
+
19
+ from pyobo import Obo, Reference, StanzaType, Synonym, Term, TypeDef
20
+ from pyobo.identifier_utils import get_converter
21
+ from pyobo.struct import Annotation, OBOLiteral, make_ad_hoc_ontology
22
+ from pyobo.struct import vocabulary as v
23
+ from pyobo.struct.typedef import has_ontology_root_term
24
+
25
+ __all__ = [
26
+ "from_node",
27
+ "from_obograph",
28
+ "from_standardized_graph",
29
+ "read_obograph",
30
+ ]
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def read_obograph(
36
+ prefix: str, path: str | Path, *, converter: Converter | None = None, strict: bool = False
37
+ ) -> Obo:
38
+ """Read an OBO Graph JSON file using :func:`obographs.read` then process into a PyOBO structure."""
39
+ graph = obographs.read(path, squeeze=True)
40
+ return from_obograph(prefix=prefix, graph=graph, converter=converter, strict=strict)
41
+
42
+
43
+ def from_obograph(
44
+ prefix: str, graph: Graph, *, converter: Converter | None = None, strict: bool = False
45
+ ) -> Obo:
46
+ """Parse a raw OBO Graph JSON into a PyOBO structure."""
47
+ if converter is None:
48
+ converter = get_converter()
49
+ standardized_graph = graph.standardize(converter, strict=strict)
50
+ return from_standardized_graph(prefix, standardized_graph)
51
+
52
+
53
+ def from_standardized_graph(prefix: str, graph: StandardizedGraph) -> Obo:
54
+ """Generate an OBO data structure from OBO Graph JSON."""
55
+ terms: dict[Reference, Term] = {}
56
+ typedefs: dict[Reference, TypeDef] = {}
57
+ for node in graph.nodes:
58
+ stanza = from_node(node)
59
+ match stanza:
60
+ case Term():
61
+ terms[stanza.reference] = stanza
62
+ case TypeDef():
63
+ typedefs[stanza.reference] = stanza
64
+
65
+ for edge in graph.edges:
66
+ s, p, o = (Reference.from_reference(r) for r in (edge.subject, edge.predicate, edge.object))
67
+ if s in terms:
68
+ stanza = terms[s]
69
+ stanza.append_relationship(p, o)
70
+ elif s in typedefs:
71
+ stanza = typedefs[s]
72
+ stanza.append_relationship(p, o)
73
+
74
+ root_terms: list[Reference] = []
75
+ property_values = []
76
+ auto_generated_by: str | None = None
77
+ if graph.meta:
78
+ for prop in graph.meta.properties or []:
79
+ predicate = Reference.from_reference(prop.predicate)
80
+ if predicate == has_ontology_root_term:
81
+ if isinstance(prop.value, str):
82
+ raise TypeError
83
+ else:
84
+ root_terms.append(Reference.from_reference(prop.value))
85
+ elif predicate == v.obo_autogenerated_by:
86
+ if not isinstance(prop.value, str):
87
+ raise TypeError
88
+ auto_generated_by = prop.value
89
+ # TODO specific subsetdef, imports
90
+ else:
91
+ property_values.append(
92
+ Annotation(
93
+ predicate=predicate,
94
+ # TODO obographs are limited by ability to specify datatype?
95
+ value=OBOLiteral.string(prop.value)
96
+ if isinstance(prop.value, str)
97
+ else Reference.from_reference(prop.value),
98
+ )
99
+ )
100
+
101
+ for equivalent_node_set in graph.equivalent_node_sets:
102
+ equivalent_reference = Reference.from_reference(equivalent_node_set.node)
103
+ if equivalent_reference in terms:
104
+ for equivalent in equivalent_node_set.equivalents:
105
+ terms[equivalent_reference].append_equivalent_to(
106
+ Reference.from_reference(equivalent)
107
+ )
108
+ elif equivalent_reference in typedefs:
109
+ for equivalent in equivalent_node_set.equivalents:
110
+ typedefs[equivalent_reference].append_equivalent_to(
111
+ Reference.from_reference(equivalent)
112
+ )
113
+ else:
114
+ logger.warning(
115
+ "unknown reference node in equivalent_node_set: %s", equivalent_reference.curie
116
+ )
117
+
118
+ for _domain_range_axiom in graph.domain_range_axioms or []:
119
+ p = Reference.from_reference(_domain_range_axiom.predicate)
120
+ if p not in typedefs:
121
+ continue
122
+ # the OBO Graph model allows for multiple ranges
123
+ # or domains, but OBO only one.
124
+ if _domain_range_axiom.ranges:
125
+ typedefs[p].range = Reference.from_reference(_domain_range_axiom.ranges[0])
126
+ if _domain_range_axiom.domains:
127
+ typedefs[p].domain = Reference.from_reference(_domain_range_axiom.domains[0])
128
+
129
+ for _property_chain_axiom in graph.property_chain_axioms:
130
+ p = Reference.from_reference(_property_chain_axiom.predicate)
131
+ if p not in typedefs or not _property_chain_axiom.chain:
132
+ continue
133
+ # TODO check if its also transitive_over and/or equivalent_to_chain
134
+ typedefs[p].holds_over_chain.append(
135
+ [Reference.from_reference(r) for r in _property_chain_axiom.chain]
136
+ )
137
+
138
+ for _logical_definition_axiom in graph.logical_definition_axioms:
139
+ pass # TODO
140
+
141
+ return make_ad_hoc_ontology(
142
+ _ontology=prefix,
143
+ _name=graph.name,
144
+ terms=list(terms.values()),
145
+ _typedefs=list(typedefs.values()),
146
+ _root_terms=root_terms,
147
+ _property_values=property_values,
148
+ _data_version=graph.version or (graph.meta.version_iri if graph.meta is not None else None),
149
+ _auto_generated_by=auto_generated_by,
150
+ )
151
+
152
+
153
+ #: A mapping between OBO Graph JSON node types and OBO stanza types
154
+ MAPPING: dict[NodeType, StanzaType] = {
155
+ "CLASS": "Term",
156
+ "INDIVIDUAL": "Instance",
157
+ "PROPERTY": "TypeDef",
158
+ }
159
+
160
+
161
+ def from_node(node: StandardizedNode) -> Term | TypeDef:
162
+ """Generate a term from a node."""
163
+ if node.type == "PROPERTY":
164
+ return _from_property(node)
165
+ return _from_term(node)
166
+
167
+
168
+ def _from_term(node: StandardizedNode) -> Term:
169
+ term = Term(
170
+ reference=_get_ref(node),
171
+ type=MAPPING[node.type] if node.type else "Term",
172
+ )
173
+ if node.meta is not None:
174
+ _process_term_meta(node.meta, term)
175
+ return term
176
+
177
+
178
+ def _from_property(node: StandardizedNode) -> TypeDef:
179
+ typedef = TypeDef(
180
+ reference=_get_ref(node),
181
+ is_metadata_tag=node.property_type == "ANNOTATION",
182
+ )
183
+ if node.meta is not None:
184
+ _process_typedef_meta(node.meta, typedef)
185
+ return typedef
186
+
187
+
188
+ def _get_ref(node: StandardizedNode) -> Reference:
189
+ return Reference(
190
+ prefix=node.reference.prefix,
191
+ identifier=node.reference.identifier,
192
+ name=node.label,
193
+ )
194
+
195
+
196
+ def _process_term_meta(meta: StandardizedMeta, term: Term) -> None:
197
+ """Process the ``meta`` object associated with a term node."""
198
+ if meta.definition:
199
+ term.definition = meta.definition.value
200
+ for definition_xref in meta.definition.xrefs or []:
201
+ term.append_definition_xref(definition_xref)
202
+
203
+ if meta.subsets:
204
+ term.subsets.extend(Reference.from_reference(r) for r in meta.subsets)
205
+
206
+ for xref in meta.xrefs or []:
207
+ term.append_xref(xref.reference)
208
+
209
+ for synonym in meta.synonyms or []:
210
+ if s := _from_synonym(synonym):
211
+ term.append_synonym(s)
212
+
213
+ for comment in meta.comments or []:
214
+ term.append_comment(comment)
215
+
216
+ if meta.deprecated:
217
+ term.is_obsolete = True
218
+
219
+ for prop in meta.properties or []:
220
+ match prop.value:
221
+ case Reference():
222
+ term.annotate_object(prop.predicate, prop.value)
223
+ case str():
224
+ # note, OBO Graph format does not allow for annotating data type
225
+ term.annotate_literal(prop.predicate, OBOLiteral.string(prop.value))
226
+
227
+
228
+ REV_SYNONYM_SCOPE: dict[curies.Reference, SynonymScope] = {v: k for k, v in synonym_scopes.items()}
229
+
230
+
231
+ def _from_synonym(syn: StandardizedSynonym) -> Synonym | None:
232
+ return Synonym(
233
+ name=syn.text,
234
+ specificity=REV_SYNONYM_SCOPE[syn.predicate],
235
+ type=Reference.from_reference(syn.type) if syn.type is not None else None,
236
+ provenance=[Reference.from_reference(r) for r in syn.xrefs or []],
237
+ )
238
+
239
+
240
+ def _process_typedef_meta(meta: StandardizedMeta, typedef: TypeDef) -> None:
241
+ """Process the ``meta`` object associated with a property node."""
242
+ # TODO everything else is in here
@@ -0,0 +1,47 @@
1
+ """Testing utilities."""
2
+
3
+ import unittest
4
+ from typing import cast
5
+
6
+ from curies import Reference
7
+ from obographs import StandardizedGraph, StandardizedMeta
8
+
9
+ __all__ = [
10
+ "assert_graph_equal",
11
+ ]
12
+
13
+
14
+ def assert_graph_equal(
15
+ test_case: unittest.TestCase, expected: StandardizedGraph, actual: StandardizedGraph
16
+ ) -> None:
17
+ """Assert two graphs are equal."""
18
+ if expected.meta is not None:
19
+ test_case.assertIsNotNone(actual.meta)
20
+ test_case.assertEqual(
21
+ expected.meta.model_dump(exclude_unset=True, exclude_none=True, exclude_defaults=True),
22
+ cast(StandardizedMeta, actual.meta).model_dump(
23
+ exclude_unset=True, exclude_none=True, exclude_defaults=True
24
+ ),
25
+ )
26
+
27
+ # strip out extra info
28
+ for node in actual.nodes:
29
+ node.reference = Reference.from_reference(node.reference)
30
+
31
+ test_case.assertEqual(
32
+ {node.reference.curie: node for node in expected.nodes},
33
+ {node.reference.curie: node for node in actual.nodes},
34
+ )
35
+ test_case.assertEqual(
36
+ {node.as_str_triple(): node for node in expected.edges},
37
+ {node.as_str_triple(): node for node in actual.edges},
38
+ )
39
+ excludes = {"nodes", "edges", "meta"}
40
+ test_case.assertEqual(
41
+ expected.model_dump(
42
+ exclude_none=True, exclude_unset=True, exclude_defaults=True, exclude=excludes
43
+ ),
44
+ actual.model_dump(
45
+ exclude_none=True, exclude_unset=True, exclude_defaults=True, exclude=excludes
46
+ ),
47
+ )
pyobo/struct/struct.py CHANGED
@@ -22,7 +22,7 @@ import curies
22
22
  import networkx as nx
23
23
  import pandas as pd
24
24
  import ssslm
25
- from curies import ReferenceTuple
25
+ from curies import Converter, ReferenceTuple
26
26
  from curies import vocabulary as _cv
27
27
  from more_click import force_option, verbose_option
28
28
  from tqdm.auto import tqdm
@@ -84,6 +84,7 @@ __all__ = [
84
84
  "Synonym",
85
85
  "SynonymTypeDef",
86
86
  "Term",
87
+ "TypeDef",
87
88
  "abbreviation",
88
89
  "acronym",
89
90
  "make_ad_hoc_ontology",
@@ -91,9 +92,6 @@ __all__ = [
91
92
 
92
93
  logger = logging.getLogger(__name__)
93
94
 
94
- #: This is what happens if no specificity is given
95
- DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
96
-
97
95
  #: Columns in the SSSOM dataframe
98
96
  SSSOM_DF_COLUMNS = [
99
97
  "subject_id",
@@ -104,7 +102,6 @@ SSSOM_DF_COLUMNS = [
104
102
  "confidence",
105
103
  "contributor",
106
104
  ]
107
- UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
108
105
  FORMAT_VERSION = "1.4"
109
106
 
110
107
 
@@ -153,14 +150,14 @@ class Synonym(HasReferencesMixin):
153
150
  def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
154
151
  return (
155
152
  self.name,
156
- self.specificity or DEFAULT_SPECIFICITY,
153
+ self.specificity or _cv.DEFAULT_SYNONYM_SCOPE,
157
154
  self.type.curie if self.type else "",
158
155
  )
159
156
 
160
157
  @property
161
158
  def predicate(self) -> curies.NamedReference:
162
159
  """Get the specificity reference."""
163
- return _cv.synonym_scopes[self.specificity or DEFAULT_SPECIFICITY]
160
+ return _cv.synonym_scopes[self.specificity or _cv.DEFAULT_SYNONYM_SCOPE]
164
161
 
165
162
  def to_obo(
166
163
  self,
@@ -189,7 +186,7 @@ class Synonym(HasReferencesMixin):
189
186
  elif self.type is not None:
190
187
  # it's not valid to have a synonym type without a specificity,
191
188
  # so automatically assign one if we'll need it
192
- x = f"{x} {DEFAULT_SPECIFICITY}"
189
+ x = f"{x} {_cv.DEFAULT_SYNONYM_SCOPE}"
193
190
 
194
191
  # Add on the synonym type, if exists
195
192
  if self.type is not None:
@@ -429,9 +426,8 @@ class Term(Stanza):
429
426
  if self.definition:
430
427
  yield f"def: {self._definition_fp()}"
431
428
  # 7
432
- for x in self.get_property_values(v.comment):
433
- if isinstance(x, OBOLiteral):
434
- yield f'comment: "{x.value}"'
429
+ for comment in self.get_comments():
430
+ yield f'comment: "{comment}"'
435
431
  # 8
436
432
  yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
437
433
  # 9
@@ -723,17 +719,11 @@ class Obo:
723
719
  """Iterate over terms in this ontology."""
724
720
  raise NotImplementedError
725
721
 
726
- def get_graph(self):
727
- """Get an OBO Graph object."""
728
- from ..obographs import graph_from_obo
729
-
730
- return graph_from_obo(self)
731
-
732
- def write_obograph(self, path: str | Path) -> None:
722
+ def write_obograph(self, path: str | Path, *, converter: Converter | None = None) -> None:
733
723
  """Write OBO Graph json."""
734
- graph = self.get_graph()
735
- with safe_open(path, read=False) as file:
736
- file.write(graph.model_dump_json(indent=2, exclude_none=True, exclude_unset=True))
724
+ from . import obograph
725
+
726
+ obograph.write_obograph(self, path, converter=converter)
737
727
 
738
728
  @classmethod
739
729
  def cli(cls, *args, default_rewrite: bool = False) -> Any:
@@ -1642,13 +1632,13 @@ class Obo:
1642
1632
  #############
1643
1633
 
1644
1634
  def iterate_edges(
1645
- self, *, use_tqdm: bool = False
1635
+ self, *, use_tqdm: bool = False, include_xrefs: bool = True
1646
1636
  ) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
1647
1637
  """Iterate over triples of terms, relations, and their targets."""
1648
1638
  _warned: set[ReferenceTuple] = set()
1649
1639
  typedefs = self._index_typedefs()
1650
1640
  for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
1651
- for predicate, reference in stanza._iter_edges():
1641
+ for predicate, reference in stanza._iter_edges(include_xrefs=include_xrefs):
1652
1642
  if td := self._get_typedef(stanza, predicate, _warned, typedefs):
1653
1643
  yield stanza, td, reference
1654
1644
 
@@ -231,6 +231,8 @@ class Stanza(Referenced, HasReferencesMixin):
231
231
  ) -> None:
232
232
  self._axioms[_property_resolve(p, o)].append(annotation)
233
233
 
234
+ # TODO check different usages of this
235
+
234
236
  def append_equivalent(
235
237
  self,
236
238
  reference: ReferenceHint,
@@ -242,6 +244,15 @@ class Stanza(Referenced, HasReferencesMixin):
242
244
  stanza_type_to_eq_prop[self.type], reference, annotations=annotations
243
245
  )
244
246
 
247
+ def append_equivalent_to(
248
+ self, reference: ReferenceHint, *, annotations: Iterable[Annotation] | None = None
249
+ ) -> Self:
250
+ """Append to the "equivalent to" list."""
251
+ reference = _ensure_ref(reference)
252
+ self.equivalent_to.append(reference)
253
+ self._extend_annotations(stanza_type_to_eq_prop[self.type], reference, annotations)
254
+ return self
255
+
245
256
  def append_xref(
246
257
  self,
247
258
  reference: ReferenceHint,
@@ -317,15 +328,6 @@ class Stanza(Referenced, HasReferencesMixin):
317
328
  self.union_of.append(_ensure_ref(reference))
318
329
  return self
319
330
 
320
- def append_equivalent_to(
321
- self, reference: ReferenceHint, *, annotations: Iterable[Annotation] | None = None
322
- ) -> Self:
323
- """Append to the "equivalent to" list."""
324
- reference = _ensure_ref(reference)
325
- self.equivalent_to.append(reference)
326
- self._extend_annotations(stanza_type_to_eq_prop[self.type], reference, annotations)
327
- return self
328
-
329
331
  def _iterate_intersection_of_obo(self, *, ontology_prefix: str) -> Iterable[str]:
330
332
  for element in sorted(self.intersection_of, key=self._intersection_of_key):
331
333
  match element:
@@ -679,14 +681,18 @@ class Stanza(Referenced, HasReferencesMixin):
679
681
  """Add a comment property."""
680
682
  return self.annotate_string(v.comment, value, annotations=annotations, language=language)
681
683
 
684
+ def get_comments(self) -> list[str]:
685
+ """Get all comment strings."""
686
+ return [x.value for x in self.get_property_values(v.comment) if isinstance(x, OBOLiteral)]
687
+
682
688
  @property
683
689
  def alt_ids(self) -> Sequence[Reference]:
684
690
  """Get alternative terms."""
685
691
  return tuple(self.get_property_objects(v.alternative_term))
686
692
 
687
- def get_edges(self) -> list[tuple[Reference, Reference]]:
693
+ def get_edges(self, *, include_xrefs: bool = True) -> list[tuple[Reference, Reference]]:
688
694
  """Get edges."""
689
- return list(self._iter_edges())
695
+ return list(self._iter_edges(include_xrefs=include_xrefs))
690
696
 
691
697
  def _iter_parents(self) -> Iterable[tuple[Reference, Reference]]:
692
698
  parent_prop = stanza_type_to_prop[self.type]
@@ -702,7 +708,7 @@ class Stanza(Referenced, HasReferencesMixin):
702
708
  case (predicate, target):
703
709
  yield predicate, target
704
710
 
705
- def _iter_edges(self) -> Iterable[tuple[Reference, Reference]]:
711
+ def _iter_edges(self, *, include_xrefs: bool = True) -> Iterable[tuple[Reference, Reference]]:
706
712
  # The following are "object" properties, meaning
707
713
  # they're part of the definition of the object
708
714
  yield from self.iterate_relations()
@@ -715,8 +721,10 @@ class Stanza(Referenced, HasReferencesMixin):
715
721
  for subset in self.subsets:
716
722
  yield v.in_subset, subset
717
723
  yield from self.iterate_object_properties()
718
- for xref_reference in self.xrefs:
719
- yield v.has_dbxref, xref_reference
724
+
725
+ if include_xrefs:
726
+ for xref_reference in self.xrefs:
727
+ yield v.has_dbxref, xref_reference
720
728
 
721
729
  # TODO disjoint_from
722
730
 
pyobo/struct/typedef.py CHANGED
@@ -294,6 +294,10 @@ has_end_date = TypeDef(
294
294
  has_title = TypeDef(reference=v.has_title, is_metadata_tag=True)
295
295
  has_license = TypeDef(reference=v.has_license, is_metadata_tag=True)
296
296
  has_description = TypeDef(reference=v.has_description, is_metadata_tag=True)
297
+ obo_autogenerated_by = TypeDef(reference=v.obo_autogenerated_by, is_metadata_tag=True)
298
+ obo_has_format_version = TypeDef(reference=v.obo_has_format_version, is_metadata_tag=True)
299
+ obo_is_metadata_tag = TypeDef(reference=v.obo_is_metadata_tag, is_metadata_tag=True)
300
+ obo_has_id = TypeDef(reference=v.obo_has_id, is_metadata_tag=True)
297
301
 
298
302
  in_subset = TypeDef(reference=v.in_subset, is_metadata_tag=True)
299
303
  has_term_editor = TypeDef(reference=v.has_term_editor, is_metadata_tag=True)
@@ -47,6 +47,13 @@ has_dbxref = _c(_v.has_dbxref)
47
47
 
48
48
  in_subset = _c(_v.obo_in_subset)
49
49
  has_obo_namespace = _c(_v.obo_has_namespace)
50
+ obo_is_metadata_tag = Reference(
51
+ prefix="oboinowl", identifier="is_metadata_tag", name="is metadata tag"
52
+ )
53
+ obo_has_id = Reference(prefix="oboinowl", identifier="id", name="has ID")
54
+ obo_has_format_version = Reference(
55
+ prefix="oboinowl", identifier="hasOBOFormatVersion", name="has OBO format version"
56
+ )
50
57
  obo_autogenerated_by = _c(_v.obo_autogenerated_by)
51
58
  obo_creation_date = _c(_v.obo_creation_date)
52
59
 
pyobo/version.py CHANGED
@@ -12,7 +12,7 @@ __all__ = [
12
12
  "get_version",
13
13
  ]
14
14
 
15
- VERSION = "0.12.2"
15
+ VERSION = "0.12.4"
16
16
 
17
17
 
18
18
  def get_git_hash() -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyobo
3
- Version: 0.12.2
3
+ Version: 0.12.4
4
4
  Summary: A python package for handling and generating OBO
5
5
  Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
6
6
  Author: Charles Tapley Hoyt
@@ -45,7 +45,7 @@ Requires-Dist: zenodo-client>=0.3.6
45
45
  Requires-Dist: class-resolver>=0.6.0
46
46
  Requires-Dist: psycopg2-binary
47
47
  Requires-Dist: pydantic>=2.0
48
- Requires-Dist: curies>=0.10.13
48
+ Requires-Dist: curies>=0.10.17
49
49
  Requires-Dist: curies-processing>=0.1.0
50
50
  Requires-Dist: python-dateutil
51
51
  Requires-Dist: networkx>=3.4
@@ -56,6 +56,7 @@ Requires-Dist: clinicaltrials-downloader>=0.0.2
56
56
  Requires-Dist: nih-reporter-downloader>=0.0.1
57
57
  Requires-Dist: typing-extensions
58
58
  Requires-Dist: rdflib
59
+ Requires-Dist: obographs>=0.0.8
59
60
  Requires-Dist: ssslm[gilda] ; extra == 'gilda'
60
61
  Requires-Dist: ssslm[gilda-slim] ; extra == 'gilda-slim'
61
62
  Maintainer: Charles Tapley Hoyt