pyobo 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +12 -4
- pyobo/getters.py +11 -3
- pyobo/identifier_utils/__init__.py +4 -1
- pyobo/identifier_utils/api.py +4 -3
- pyobo/sources/__init__.py +2 -0
- pyobo/sources/credit.py +17 -6
- pyobo/sources/drugbank/drugbank.py +1 -1
- pyobo/sources/gwascentral/gwascentral_study.py +1 -1
- pyobo/sources/intact.py +79 -0
- pyobo/struct/__init__.py +2 -1
- pyobo/struct/functional/ontology.py +2 -2
- pyobo/struct/obo/__init__.py +9 -0
- pyobo/{reader.py → struct/obo/reader.py} +21 -18
- pyobo/struct/obograph/__init__.py +16 -0
- pyobo/struct/obograph/export.py +315 -0
- pyobo/struct/obograph/reader.py +242 -0
- pyobo/struct/obograph/utils.py +47 -0
- pyobo/struct/struct.py +13 -23
- pyobo/struct/struct_utils.py +22 -14
- pyobo/struct/typedef.py +4 -0
- pyobo/struct/vocabulary.py +7 -0
- pyobo/version.py +1 -1
- {pyobo-0.12.1.dist-info → pyobo-0.12.3.dist-info}/METADATA +5 -16
- {pyobo-0.12.1.dist-info → pyobo-0.12.3.dist-info}/RECORD +171 -170
- {pyobo-0.12.1.dist-info → pyobo-0.12.3.dist-info}/WHEEL +1 -1
- pyobo/identifier_utils/preprocessing.json +0 -873
- pyobo/identifier_utils/preprocessing.py +0 -27
- pyobo/obographs.py +0 -147
- pyobo/resources/goc.py +0 -75
- pyobo/resources/goc.tsv +0 -188
- /pyobo/{reader_utils.py → struct/obo/reader_utils.py} +0 -0
- {pyobo-0.12.1.dist-info → pyobo-0.12.3.dist-info}/entry_points.txt +0 -0
- {pyobo-0.12.1.dist-info → pyobo-0.12.3.dist-info}/licenses/LICENSE +0 -0
pyobo/struct/struct.py
CHANGED
|
@@ -22,7 +22,7 @@ import curies
|
|
|
22
22
|
import networkx as nx
|
|
23
23
|
import pandas as pd
|
|
24
24
|
import ssslm
|
|
25
|
-
from curies import ReferenceTuple
|
|
25
|
+
from curies import Converter, ReferenceTuple
|
|
26
26
|
from curies import vocabulary as _cv
|
|
27
27
|
from more_click import force_option, verbose_option
|
|
28
28
|
from tqdm.auto import tqdm
|
|
@@ -84,6 +84,7 @@ __all__ = [
|
|
|
84
84
|
"Synonym",
|
|
85
85
|
"SynonymTypeDef",
|
|
86
86
|
"Term",
|
|
87
|
+
"TypeDef",
|
|
87
88
|
"abbreviation",
|
|
88
89
|
"acronym",
|
|
89
90
|
"make_ad_hoc_ontology",
|
|
@@ -91,9 +92,6 @@ __all__ = [
|
|
|
91
92
|
|
|
92
93
|
logger = logging.getLogger(__name__)
|
|
93
94
|
|
|
94
|
-
#: This is what happens if no specificity is given
|
|
95
|
-
DEFAULT_SPECIFICITY: _cv.SynonymScope = "RELATED"
|
|
96
|
-
|
|
97
95
|
#: Columns in the SSSOM dataframe
|
|
98
96
|
SSSOM_DF_COLUMNS = [
|
|
99
97
|
"subject_id",
|
|
@@ -104,7 +102,6 @@ SSSOM_DF_COLUMNS = [
|
|
|
104
102
|
"confidence",
|
|
105
103
|
"contributor",
|
|
106
104
|
]
|
|
107
|
-
UNSPECIFIED_MATCHING_CURIE = "sempav:UnspecifiedMatching"
|
|
108
105
|
FORMAT_VERSION = "1.4"
|
|
109
106
|
|
|
110
107
|
|
|
@@ -153,14 +150,14 @@ class Synonym(HasReferencesMixin):
|
|
|
153
150
|
def _sort_key(self) -> tuple[str, _cv.SynonymScope, str]:
|
|
154
151
|
return (
|
|
155
152
|
self.name,
|
|
156
|
-
self.specificity or
|
|
153
|
+
self.specificity or _cv.DEFAULT_SYNONYM_SCOPE,
|
|
157
154
|
self.type.curie if self.type else "",
|
|
158
155
|
)
|
|
159
156
|
|
|
160
157
|
@property
|
|
161
158
|
def predicate(self) -> curies.NamedReference:
|
|
162
159
|
"""Get the specificity reference."""
|
|
163
|
-
return _cv.synonym_scopes[self.specificity or
|
|
160
|
+
return _cv.synonym_scopes[self.specificity or _cv.DEFAULT_SYNONYM_SCOPE]
|
|
164
161
|
|
|
165
162
|
def to_obo(
|
|
166
163
|
self,
|
|
@@ -189,7 +186,7 @@ class Synonym(HasReferencesMixin):
|
|
|
189
186
|
elif self.type is not None:
|
|
190
187
|
# it's not valid to have a synonym type without a specificity,
|
|
191
188
|
# so automatically assign one if we'll need it
|
|
192
|
-
x = f"{x} {
|
|
189
|
+
x = f"{x} {_cv.DEFAULT_SYNONYM_SCOPE}"
|
|
193
190
|
|
|
194
191
|
# Add on the synonym type, if exists
|
|
195
192
|
if self.type is not None:
|
|
@@ -429,9 +426,8 @@ class Term(Stanza):
|
|
|
429
426
|
if self.definition:
|
|
430
427
|
yield f"def: {self._definition_fp()}"
|
|
431
428
|
# 7
|
|
432
|
-
for
|
|
433
|
-
|
|
434
|
-
yield f'comment: "{x.value}"'
|
|
429
|
+
for comment in self.get_comments():
|
|
430
|
+
yield f'comment: "{comment}"'
|
|
435
431
|
# 8
|
|
436
432
|
yield from _reference_list_tag("subset", self.subsets, ontology_prefix)
|
|
437
433
|
# 9
|
|
@@ -723,17 +719,11 @@ class Obo:
|
|
|
723
719
|
"""Iterate over terms in this ontology."""
|
|
724
720
|
raise NotImplementedError
|
|
725
721
|
|
|
726
|
-
def
|
|
727
|
-
"""Get an OBO Graph object."""
|
|
728
|
-
from ..obographs import graph_from_obo
|
|
729
|
-
|
|
730
|
-
return graph_from_obo(self)
|
|
731
|
-
|
|
732
|
-
def write_obograph(self, path: str | Path) -> None:
|
|
722
|
+
def write_obograph(self, path: str | Path, *, converter: Converter | None = None) -> None:
|
|
733
723
|
"""Write OBO Graph json."""
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
724
|
+
from . import obograph
|
|
725
|
+
|
|
726
|
+
obograph.write_obograph(self, path, converter=converter)
|
|
737
727
|
|
|
738
728
|
@classmethod
|
|
739
729
|
def cli(cls, *args, default_rewrite: bool = False) -> Any:
|
|
@@ -1642,13 +1632,13 @@ class Obo:
|
|
|
1642
1632
|
#############
|
|
1643
1633
|
|
|
1644
1634
|
def iterate_edges(
|
|
1645
|
-
self, *, use_tqdm: bool = False
|
|
1635
|
+
self, *, use_tqdm: bool = False, include_xrefs: bool = True
|
|
1646
1636
|
) -> Iterable[tuple[Stanza, TypeDef, Reference]]:
|
|
1647
1637
|
"""Iterate over triples of terms, relations, and their targets."""
|
|
1648
1638
|
_warned: set[ReferenceTuple] = set()
|
|
1649
1639
|
typedefs = self._index_typedefs()
|
|
1650
1640
|
for stanza in self._iter_stanzas(use_tqdm=use_tqdm, desc=f"[{self.ontology}] edge"):
|
|
1651
|
-
for predicate, reference in stanza._iter_edges():
|
|
1641
|
+
for predicate, reference in stanza._iter_edges(include_xrefs=include_xrefs):
|
|
1652
1642
|
if td := self._get_typedef(stanza, predicate, _warned, typedefs):
|
|
1653
1643
|
yield stanza, td, reference
|
|
1654
1644
|
|
pyobo/struct/struct_utils.py
CHANGED
|
@@ -231,6 +231,8 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
231
231
|
) -> None:
|
|
232
232
|
self._axioms[_property_resolve(p, o)].append(annotation)
|
|
233
233
|
|
|
234
|
+
# TODO check different usages of this
|
|
235
|
+
|
|
234
236
|
def append_equivalent(
|
|
235
237
|
self,
|
|
236
238
|
reference: ReferenceHint,
|
|
@@ -242,6 +244,15 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
242
244
|
stanza_type_to_eq_prop[self.type], reference, annotations=annotations
|
|
243
245
|
)
|
|
244
246
|
|
|
247
|
+
def append_equivalent_to(
|
|
248
|
+
self, reference: ReferenceHint, *, annotations: Iterable[Annotation] | None = None
|
|
249
|
+
) -> Self:
|
|
250
|
+
"""Append to the "equivalent to" list."""
|
|
251
|
+
reference = _ensure_ref(reference)
|
|
252
|
+
self.equivalent_to.append(reference)
|
|
253
|
+
self._extend_annotations(stanza_type_to_eq_prop[self.type], reference, annotations)
|
|
254
|
+
return self
|
|
255
|
+
|
|
245
256
|
def append_xref(
|
|
246
257
|
self,
|
|
247
258
|
reference: ReferenceHint,
|
|
@@ -317,15 +328,6 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
317
328
|
self.union_of.append(_ensure_ref(reference))
|
|
318
329
|
return self
|
|
319
330
|
|
|
320
|
-
def append_equivalent_to(
|
|
321
|
-
self, reference: ReferenceHint, *, annotations: Iterable[Annotation] | None = None
|
|
322
|
-
) -> Self:
|
|
323
|
-
"""Append to the "equivalent to" list."""
|
|
324
|
-
reference = _ensure_ref(reference)
|
|
325
|
-
self.equivalent_to.append(reference)
|
|
326
|
-
self._extend_annotations(stanza_type_to_eq_prop[self.type], reference, annotations)
|
|
327
|
-
return self
|
|
328
|
-
|
|
329
331
|
def _iterate_intersection_of_obo(self, *, ontology_prefix: str) -> Iterable[str]:
|
|
330
332
|
for element in sorted(self.intersection_of, key=self._intersection_of_key):
|
|
331
333
|
match element:
|
|
@@ -679,14 +681,18 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
679
681
|
"""Add a comment property."""
|
|
680
682
|
return self.annotate_string(v.comment, value, annotations=annotations, language=language)
|
|
681
683
|
|
|
684
|
+
def get_comments(self) -> list[str]:
|
|
685
|
+
"""Get all comment strings."""
|
|
686
|
+
return [x.value for x in self.get_property_values(v.comment) if isinstance(x, OBOLiteral)]
|
|
687
|
+
|
|
682
688
|
@property
|
|
683
689
|
def alt_ids(self) -> Sequence[Reference]:
|
|
684
690
|
"""Get alternative terms."""
|
|
685
691
|
return tuple(self.get_property_objects(v.alternative_term))
|
|
686
692
|
|
|
687
|
-
def get_edges(self) -> list[tuple[Reference, Reference]]:
|
|
693
|
+
def get_edges(self, *, include_xrefs: bool = True) -> list[tuple[Reference, Reference]]:
|
|
688
694
|
"""Get edges."""
|
|
689
|
-
return list(self._iter_edges())
|
|
695
|
+
return list(self._iter_edges(include_xrefs=include_xrefs))
|
|
690
696
|
|
|
691
697
|
def _iter_parents(self) -> Iterable[tuple[Reference, Reference]]:
|
|
692
698
|
parent_prop = stanza_type_to_prop[self.type]
|
|
@@ -702,7 +708,7 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
702
708
|
case (predicate, target):
|
|
703
709
|
yield predicate, target
|
|
704
710
|
|
|
705
|
-
def _iter_edges(self) -> Iterable[tuple[Reference, Reference]]:
|
|
711
|
+
def _iter_edges(self, *, include_xrefs: bool = True) -> Iterable[tuple[Reference, Reference]]:
|
|
706
712
|
# The following are "object" properties, meaning
|
|
707
713
|
# they're part of the definition of the object
|
|
708
714
|
yield from self.iterate_relations()
|
|
@@ -715,8 +721,10 @@ class Stanza(Referenced, HasReferencesMixin):
|
|
|
715
721
|
for subset in self.subsets:
|
|
716
722
|
yield v.in_subset, subset
|
|
717
723
|
yield from self.iterate_object_properties()
|
|
718
|
-
|
|
719
|
-
|
|
724
|
+
|
|
725
|
+
if include_xrefs:
|
|
726
|
+
for xref_reference in self.xrefs:
|
|
727
|
+
yield v.has_dbxref, xref_reference
|
|
720
728
|
|
|
721
729
|
# TODO disjoint_from
|
|
722
730
|
|
pyobo/struct/typedef.py
CHANGED
|
@@ -294,6 +294,10 @@ has_end_date = TypeDef(
|
|
|
294
294
|
has_title = TypeDef(reference=v.has_title, is_metadata_tag=True)
|
|
295
295
|
has_license = TypeDef(reference=v.has_license, is_metadata_tag=True)
|
|
296
296
|
has_description = TypeDef(reference=v.has_description, is_metadata_tag=True)
|
|
297
|
+
obo_autogenerated_by = TypeDef(reference=v.obo_autogenerated_by, is_metadata_tag=True)
|
|
298
|
+
obo_has_format_version = TypeDef(reference=v.obo_has_format_version, is_metadata_tag=True)
|
|
299
|
+
obo_is_metadata_tag = TypeDef(reference=v.obo_is_metadata_tag, is_metadata_tag=True)
|
|
300
|
+
obo_has_id = TypeDef(reference=v.obo_has_id, is_metadata_tag=True)
|
|
297
301
|
|
|
298
302
|
in_subset = TypeDef(reference=v.in_subset, is_metadata_tag=True)
|
|
299
303
|
has_term_editor = TypeDef(reference=v.has_term_editor, is_metadata_tag=True)
|
pyobo/struct/vocabulary.py
CHANGED
|
@@ -47,6 +47,13 @@ has_dbxref = _c(_v.has_dbxref)
|
|
|
47
47
|
|
|
48
48
|
in_subset = _c(_v.obo_in_subset)
|
|
49
49
|
has_obo_namespace = _c(_v.obo_has_namespace)
|
|
50
|
+
obo_is_metadata_tag = Reference(
|
|
51
|
+
prefix="oboinowl", identifier="is_metadata_tag", name="is metadata tag"
|
|
52
|
+
)
|
|
53
|
+
obo_has_id = Reference(prefix="oboinowl", identifier="id", name="has ID")
|
|
54
|
+
obo_has_format_version = Reference(
|
|
55
|
+
prefix="oboinowl", identifier="hasOBOFormatVersion", name="has OBO format version"
|
|
56
|
+
)
|
|
50
57
|
obo_autogenerated_by = _c(_v.obo_autogenerated_by)
|
|
51
58
|
obo_creation_date = _c(_v.obo_creation_date)
|
|
52
59
|
|
pyobo/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pyobo
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: A python package for handling and generating OBO
|
|
5
5
|
Keywords: snekpack,cookiecutter,ontologies,biomedical ontologies,life sciences,natural sciences,bioinformatics,cheminformatics,Open Biomedical Ontologies,OBO
|
|
6
6
|
Author: Charles Tapley Hoyt
|
|
@@ -39,13 +39,14 @@ Requires-Dist: cachier
|
|
|
39
39
|
Requires-Dist: pystow>=0.7.0
|
|
40
40
|
Requires-Dist: bioversions>=0.8.0
|
|
41
41
|
Requires-Dist: bioregistry>=0.12.7
|
|
42
|
-
Requires-Dist: bioontologies>=0.7.
|
|
42
|
+
Requires-Dist: bioontologies>=0.7.2
|
|
43
43
|
Requires-Dist: ssslm>=0.0.13
|
|
44
44
|
Requires-Dist: zenodo-client>=0.3.6
|
|
45
45
|
Requires-Dist: class-resolver>=0.6.0
|
|
46
46
|
Requires-Dist: psycopg2-binary
|
|
47
47
|
Requires-Dist: pydantic>=2.0
|
|
48
|
-
Requires-Dist: curies>=0.10.
|
|
48
|
+
Requires-Dist: curies>=0.10.17
|
|
49
|
+
Requires-Dist: curies-processing>=0.1.0
|
|
49
50
|
Requires-Dist: python-dateutil
|
|
50
51
|
Requires-Dist: networkx>=3.4
|
|
51
52
|
Requires-Dist: drugbank-downloader
|
|
@@ -55,6 +56,7 @@ Requires-Dist: clinicaltrials-downloader>=0.0.2
|
|
|
55
56
|
Requires-Dist: nih-reporter-downloader>=0.0.1
|
|
56
57
|
Requires-Dist: typing-extensions
|
|
57
58
|
Requires-Dist: rdflib
|
|
59
|
+
Requires-Dist: obographs>=0.0.8
|
|
58
60
|
Requires-Dist: ssslm[gilda] ; extra == 'gilda'
|
|
59
61
|
Requires-Dist: ssslm[gilda-slim] ; extra == 'gilda-slim'
|
|
60
62
|
Maintainer: Charles Tapley Hoyt
|
|
@@ -395,19 +397,6 @@ class MyTestCase(unittest.TestCase):
|
|
|
395
397
|
pyobo.get_name("chebi", "1234")
|
|
396
398
|
```
|
|
397
399
|
|
|
398
|
-
## Preprocessing CURIEs, URIs, and unqualified identifiers
|
|
399
|
-
|
|
400
|
-
In order to normalize references and identify resources, PyOBO uses the
|
|
401
|
-
[Bioregistry](https://github.com/bioregistry/bioregistry). It used to be a part
|
|
402
|
-
of PyOBO, but has since been externalized for more general reuse.
|
|
403
|
-
|
|
404
|
-
At
|
|
405
|
-
[src/pyobo/identifier_utils/preprocessing.json](https://github.com/pyobo/pyobo/blob/master/src/pyobo/src/pyobo/identifier_utils/preprocessing.json)
|
|
406
|
-
is the curated set of pre-processing rules. These are used in combination with
|
|
407
|
-
the `curies` package to do pre-processing steps on CURIEs, URIs, and unqualified
|
|
408
|
-
identifiers beyond what is possible with the Bioregistry. See
|
|
409
|
-
https://curies.readthedocs.io/en/latest/preprocessing.html.
|
|
410
|
-
|
|
411
400
|
## Troubleshooting
|
|
412
401
|
|
|
413
402
|
The OBO Foundry seems to be pretty unstable with respect to the URLs to OBO
|