pyobo 0.10.11__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyobo/__init__.py +0 -2
- pyobo/__main__.py +0 -2
- pyobo/api/__init__.py +0 -2
- pyobo/api/alts.py +6 -7
- pyobo/api/hierarchy.py +14 -15
- pyobo/api/metadata.py +3 -4
- pyobo/api/names.py +51 -31
- pyobo/api/properties.py +6 -7
- pyobo/api/relations.py +12 -11
- pyobo/api/species.py +5 -6
- pyobo/api/typedefs.py +1 -3
- pyobo/api/utils.py +63 -2
- pyobo/api/xrefs.py +4 -5
- pyobo/aws.py +3 -5
- pyobo/cli/__init__.py +0 -2
- pyobo/cli/aws.py +0 -2
- pyobo/cli/cli.py +0 -4
- pyobo/cli/database.py +1 -3
- pyobo/cli/lookup.py +2 -4
- pyobo/cli/utils.py +0 -2
- pyobo/constants.py +0 -3
- pyobo/getters.py +19 -26
- pyobo/gilda_utils.py +28 -8
- pyobo/identifier_utils.py +32 -15
- pyobo/mocks.py +5 -6
- pyobo/normalizer.py +24 -24
- pyobo/obographs.py +3 -3
- pyobo/plugins.py +3 -4
- pyobo/py.typed +0 -0
- pyobo/reader.py +19 -21
- pyobo/registries/__init__.py +0 -2
- pyobo/registries/metaregistry.py +6 -8
- pyobo/resource_utils.py +1 -3
- pyobo/resources/__init__.py +0 -2
- pyobo/resources/ncbitaxon.py +2 -3
- pyobo/resources/ro.py +2 -4
- pyobo/sources/README.md +15 -0
- pyobo/sources/__init__.py +2 -2
- pyobo/sources/agrovoc.py +3 -3
- pyobo/sources/antibodyregistry.py +4 -5
- pyobo/sources/biogrid.py +7 -7
- pyobo/sources/ccle.py +3 -4
- pyobo/sources/cgnc.py +1 -3
- pyobo/sources/chebi.py +2 -4
- pyobo/sources/chembl.py +1 -3
- pyobo/sources/civic_gene.py +2 -3
- pyobo/sources/complexportal.py +3 -5
- pyobo/sources/conso.py +2 -4
- pyobo/sources/cpt.py +1 -3
- pyobo/sources/credit.py +68 -0
- pyobo/sources/cvx.py +1 -3
- pyobo/sources/depmap.py +3 -4
- pyobo/sources/dictybase_gene.py +1 -3
- pyobo/sources/drugbank.py +6 -7
- pyobo/sources/drugbank_salt.py +3 -4
- pyobo/sources/drugcentral.py +5 -7
- pyobo/sources/expasy.py +11 -12
- pyobo/sources/famplex.py +3 -5
- pyobo/sources/flybase.py +2 -4
- pyobo/sources/geonames.py +28 -10
- pyobo/sources/gmt_utils.py +5 -6
- pyobo/sources/go.py +4 -6
- pyobo/sources/gwascentral_phenotype.py +1 -3
- pyobo/sources/gwascentral_study.py +2 -3
- pyobo/sources/hgnc.py +8 -9
- pyobo/sources/hgncgenefamily.py +2 -4
- pyobo/sources/icd10.py +3 -4
- pyobo/sources/icd11.py +3 -4
- pyobo/sources/icd_utils.py +6 -7
- pyobo/sources/interpro.py +3 -5
- pyobo/sources/itis.py +1 -3
- pyobo/sources/kegg/__init__.py +0 -2
- pyobo/sources/kegg/api.py +3 -4
- pyobo/sources/kegg/genes.py +3 -4
- pyobo/sources/kegg/genome.py +1 -3
- pyobo/sources/kegg/pathway.py +5 -6
- pyobo/sources/mesh.py +19 -21
- pyobo/sources/mgi.py +1 -3
- pyobo/sources/mirbase.py +4 -6
- pyobo/sources/mirbase_constants.py +0 -2
- pyobo/sources/mirbase_family.py +1 -3
- pyobo/sources/mirbase_mature.py +1 -3
- pyobo/sources/msigdb.py +4 -5
- pyobo/sources/ncbigene.py +3 -5
- pyobo/sources/npass.py +2 -4
- pyobo/sources/omim_ps.py +1 -3
- pyobo/sources/pathbank.py +3 -5
- pyobo/sources/pfam.py +1 -3
- pyobo/sources/pfam_clan.py +1 -3
- pyobo/sources/pid.py +3 -5
- pyobo/sources/pombase.py +1 -3
- pyobo/sources/pubchem.py +5 -6
- pyobo/sources/reactome.py +2 -4
- pyobo/sources/rgd.py +3 -4
- pyobo/sources/rhea.py +9 -10
- pyobo/sources/ror.py +69 -22
- pyobo/sources/selventa/__init__.py +0 -2
- pyobo/sources/selventa/schem.py +1 -3
- pyobo/sources/selventa/scomp.py +1 -3
- pyobo/sources/selventa/sdis.py +1 -3
- pyobo/sources/selventa/sfam.py +1 -3
- pyobo/sources/sgd.py +1 -3
- pyobo/sources/slm.py +1 -3
- pyobo/sources/umls/__init__.py +0 -2
- pyobo/sources/umls/__main__.py +0 -2
- pyobo/sources/umls/get_synonym_types.py +1 -1
- pyobo/sources/umls/umls.py +2 -4
- pyobo/sources/uniprot/__init__.py +0 -2
- pyobo/sources/uniprot/uniprot.py +6 -6
- pyobo/sources/uniprot/uniprot_ptm.py +6 -5
- pyobo/sources/utils.py +3 -5
- pyobo/sources/wikipathways.py +1 -3
- pyobo/sources/zfin.py +2 -3
- pyobo/ssg/__init__.py +3 -2
- pyobo/struct/__init__.py +0 -2
- pyobo/struct/reference.py +13 -15
- pyobo/struct/struct.py +106 -99
- pyobo/struct/typedef.py +19 -10
- pyobo/struct/utils.py +0 -2
- pyobo/utils/__init__.py +0 -2
- pyobo/utils/cache.py +14 -6
- pyobo/utils/io.py +9 -10
- pyobo/utils/iter.py +5 -6
- pyobo/utils/misc.py +1 -3
- pyobo/utils/ndex_utils.py +6 -7
- pyobo/utils/path.py +5 -5
- pyobo/version.py +3 -5
- pyobo/xrefdb/__init__.py +0 -2
- pyobo/xrefdb/canonicalizer.py +27 -18
- pyobo/xrefdb/priority.py +0 -2
- pyobo/xrefdb/sources/__init__.py +9 -7
- pyobo/xrefdb/sources/biomappings.py +0 -2
- pyobo/xrefdb/sources/cbms2019.py +0 -2
- pyobo/xrefdb/sources/chembl.py +5 -7
- pyobo/xrefdb/sources/compath.py +1 -3
- pyobo/xrefdb/sources/famplex.py +3 -5
- pyobo/xrefdb/sources/gilda.py +0 -2
- pyobo/xrefdb/sources/intact.py +5 -5
- pyobo/xrefdb/sources/ncit.py +1 -3
- pyobo/xrefdb/sources/pubchem.py +2 -4
- pyobo/xrefdb/sources/wikidata.py +10 -5
- pyobo/xrefdb/xrefs_pipeline.py +15 -16
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/LICENSE +1 -1
- pyobo-0.11.0.dist-info/METADATA +723 -0
- pyobo-0.11.0.dist-info/RECORD +171 -0
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/WHEEL +1 -1
- pyobo-0.11.0.dist-info/entry_points.txt +2 -0
- pyobo/xrefdb/bengo.py +0 -44
- pyobo-0.10.11.dist-info/METADATA +0 -499
- pyobo-0.10.11.dist-info/RECORD +0 -169
- pyobo-0.10.11.dist-info/entry_points.txt +0 -15
- {pyobo-0.10.11.dist-info → pyobo-0.11.0.dist-info}/top_level.txt +0 -0
pyobo/struct/struct.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
"""Data structures for OBO."""
|
|
4
2
|
|
|
5
3
|
import gzip
|
|
@@ -7,6 +5,7 @@ import json
|
|
|
7
5
|
import logging
|
|
8
6
|
import os
|
|
9
7
|
from collections import defaultdict
|
|
8
|
+
from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
|
|
10
9
|
from dataclasses import dataclass, field
|
|
11
10
|
from datetime import datetime
|
|
12
11
|
from operator import attrgetter
|
|
@@ -16,17 +15,8 @@ from typing import (
|
|
|
16
15
|
Any,
|
|
17
16
|
Callable,
|
|
18
17
|
ClassVar,
|
|
19
|
-
Collection,
|
|
20
|
-
Dict,
|
|
21
|
-
Iterable,
|
|
22
|
-
Iterator,
|
|
23
|
-
List,
|
|
24
|
-
Mapping,
|
|
25
18
|
Optional,
|
|
26
|
-
Sequence,
|
|
27
|
-
Set,
|
|
28
19
|
TextIO,
|
|
29
|
-
Tuple,
|
|
30
20
|
Union,
|
|
31
21
|
)
|
|
32
22
|
|
|
@@ -56,6 +46,7 @@ from .typedef import (
|
|
|
56
46
|
term_replaced_by,
|
|
57
47
|
)
|
|
58
48
|
from .utils import comma_separate, obo_escape_slim
|
|
49
|
+
from ..api.utils import get_version
|
|
59
50
|
from ..constants import (
|
|
60
51
|
DATE_FORMAT,
|
|
61
52
|
NCBITAXON_PREFIX,
|
|
@@ -77,6 +68,8 @@ __all__ = [
|
|
|
77
68
|
"Term",
|
|
78
69
|
"Obo",
|
|
79
70
|
"make_ad_hoc_ontology",
|
|
71
|
+
"abbreviation",
|
|
72
|
+
"acronym",
|
|
80
73
|
]
|
|
81
74
|
|
|
82
75
|
logger = logging.getLogger(__name__)
|
|
@@ -101,7 +94,7 @@ class Synonym:
|
|
|
101
94
|
)
|
|
102
95
|
|
|
103
96
|
#: References to articles where the synonym appears
|
|
104
|
-
provenance:
|
|
97
|
+
provenance: list[Reference] = field(default_factory=list)
|
|
105
98
|
|
|
106
99
|
def to_obo(self) -> str:
|
|
107
100
|
"""Write this synonym as an OBO line to appear in a [Term] stanza."""
|
|
@@ -165,7 +158,7 @@ abbreviation = SynonymTypeDef(
|
|
|
165
158
|
acronym = SynonymTypeDef(reference=Reference(prefix="omo", identifier="0003012", name="acronym"))
|
|
166
159
|
|
|
167
160
|
|
|
168
|
-
ReferenceHint = Union[Reference, "Term",
|
|
161
|
+
ReferenceHint = Union[Reference, "Term", tuple[str, str], str]
|
|
169
162
|
|
|
170
163
|
|
|
171
164
|
def _ensure_ref(reference: ReferenceHint) -> Reference:
|
|
@@ -196,26 +189,26 @@ class Term(Referenced):
|
|
|
196
189
|
definition: Optional[str] = None
|
|
197
190
|
|
|
198
191
|
#: References to articles in which the term appears
|
|
199
|
-
provenance:
|
|
192
|
+
provenance: list[Reference] = field(default_factory=list)
|
|
200
193
|
|
|
201
194
|
#: Relationships defined by [Typedef] stanzas
|
|
202
|
-
relationships:
|
|
195
|
+
relationships: dict[TypeDef, list[Reference]] = field(default_factory=lambda: defaultdict(list))
|
|
203
196
|
|
|
204
197
|
#: Properties, which are not defined with Typedef and have scalar values instead of references.
|
|
205
|
-
properties:
|
|
198
|
+
properties: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list))
|
|
206
199
|
|
|
207
200
|
#: Relationships with the default "is_a"
|
|
208
|
-
parents:
|
|
201
|
+
parents: list[Reference] = field(default_factory=list)
|
|
209
202
|
|
|
210
203
|
#: Synonyms of this term
|
|
211
|
-
synonyms:
|
|
204
|
+
synonyms: list[Synonym] = field(default_factory=list)
|
|
212
205
|
|
|
213
206
|
#: Equivalent references
|
|
214
|
-
xrefs:
|
|
215
|
-
xref_types:
|
|
207
|
+
xrefs: list[Reference] = field(default_factory=list)
|
|
208
|
+
xref_types: list[Reference] = field(default_factory=list)
|
|
216
209
|
|
|
217
210
|
#: Alternate Identifiers
|
|
218
|
-
alt_ids:
|
|
211
|
+
alt_ids: list[Reference] = field(default_factory=list)
|
|
219
212
|
|
|
220
213
|
#: The sub-namespace within the ontology
|
|
221
214
|
namespace: Optional[str] = None
|
|
@@ -225,7 +218,7 @@ class Term(Referenced):
|
|
|
225
218
|
|
|
226
219
|
type: Literal["Term", "Instance"] = "Term"
|
|
227
220
|
|
|
228
|
-
def __hash__(self):
|
|
221
|
+
def __hash__(self):
|
|
229
222
|
return hash((self.__class__, self.prefix, self.identifier))
|
|
230
223
|
|
|
231
224
|
@classmethod
|
|
@@ -318,7 +311,7 @@ class Term(Referenced):
|
|
|
318
311
|
raise ValueError("can not append a collection of parents containing a null parent")
|
|
319
312
|
self.parents.extend(references)
|
|
320
313
|
|
|
321
|
-
def get_properties(self, prop) ->
|
|
314
|
+
def get_properties(self, prop) -> list[str]:
|
|
322
315
|
"""Get properties from the given key."""
|
|
323
316
|
return self.properties[prop]
|
|
324
317
|
|
|
@@ -340,7 +333,7 @@ class Term(Referenced):
|
|
|
340
333
|
raise ValueError
|
|
341
334
|
return r[0]
|
|
342
335
|
|
|
343
|
-
def get_relationships(self, typedef: TypeDef) ->
|
|
336
|
+
def get_relationships(self, typedef: TypeDef) -> list[Reference]:
|
|
344
337
|
"""Get relationships from the given type."""
|
|
345
338
|
return self.relationships[typedef]
|
|
346
339
|
|
|
@@ -396,16 +389,17 @@ class Term(Referenced):
|
|
|
396
389
|
self.properties[prop].append(value)
|
|
397
390
|
|
|
398
391
|
def _definition_fp(self) -> str:
|
|
399
|
-
|
|
392
|
+
if self.definition is None:
|
|
393
|
+
raise AssertionError
|
|
400
394
|
return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
|
|
401
395
|
|
|
402
|
-
def iterate_relations(self) -> Iterable[
|
|
396
|
+
def iterate_relations(self) -> Iterable[tuple[TypeDef, Reference]]:
|
|
403
397
|
"""Iterate over pairs of typedefs and targets."""
|
|
404
398
|
for typedef, targets in sorted(self.relationships.items(), key=_sort_relations):
|
|
405
399
|
for target in sorted(targets, key=lambda ref: ref.preferred_curie):
|
|
406
400
|
yield typedef, target
|
|
407
401
|
|
|
408
|
-
def iterate_properties(self) -> Iterable[
|
|
402
|
+
def iterate_properties(self) -> Iterable[tuple[str, str]]:
|
|
409
403
|
"""Iterate over pairs of property and values."""
|
|
410
404
|
for prop, values in sorted(self.properties.items()):
|
|
411
405
|
for value in sorted(values):
|
|
@@ -467,7 +461,7 @@ class Term(Referenced):
|
|
|
467
461
|
|
|
468
462
|
|
|
469
463
|
#: A set of warnings, used to make sure we don't show the same one over and over
|
|
470
|
-
_TYPEDEF_WARNINGS:
|
|
464
|
+
_TYPEDEF_WARNINGS: set[tuple[str, str]] = set()
|
|
471
465
|
|
|
472
466
|
|
|
473
467
|
def _sort_relations(r):
|
|
@@ -486,6 +480,8 @@ def _sort_properties(r):
|
|
|
486
480
|
|
|
487
481
|
|
|
488
482
|
class BioregistryError(ValueError):
|
|
483
|
+
"""An error raised for non-canonical prefixes."""
|
|
484
|
+
|
|
489
485
|
def __str__(self) -> str:
|
|
490
486
|
return dedent(
|
|
491
487
|
f"""
|
|
@@ -515,10 +511,10 @@ class Obo:
|
|
|
515
511
|
format_version: ClassVar[str] = "1.2"
|
|
516
512
|
|
|
517
513
|
#: Type definitions
|
|
518
|
-
typedefs: ClassVar[Optional[
|
|
514
|
+
typedefs: ClassVar[Optional[list[TypeDef]]] = None
|
|
519
515
|
|
|
520
516
|
#: Synonym type definitions
|
|
521
|
-
synonym_typedefs: ClassVar[Optional[
|
|
517
|
+
synonym_typedefs: ClassVar[Optional[list[SynonymTypeDef]]] = None
|
|
522
518
|
|
|
523
519
|
#: An annotation about how an ontology was generated
|
|
524
520
|
auto_generated_by: ClassVar[Optional[str]] = None
|
|
@@ -538,7 +534,7 @@ class Obo:
|
|
|
538
534
|
bioversions_key: ClassVar[Optional[str]] = None
|
|
539
535
|
|
|
540
536
|
#: Root terms to use for the ontology
|
|
541
|
-
root_terms: ClassVar[Optional[
|
|
537
|
+
root_terms: ClassVar[Optional[list[Reference]]] = None
|
|
542
538
|
|
|
543
539
|
#: The date the ontology was generated
|
|
544
540
|
date: Optional[datetime] = field(default_factory=datetime.today)
|
|
@@ -552,7 +548,7 @@ class Obo:
|
|
|
552
548
|
#: The hierarchy of terms
|
|
553
549
|
_hierarchy: Optional[nx.DiGraph] = field(init=False, default=None, repr=False)
|
|
554
550
|
#: A cache of terms
|
|
555
|
-
_items: Optional[
|
|
551
|
+
_items: Optional[list[Term]] = field(init=False, default=None, repr=False)
|
|
556
552
|
|
|
557
553
|
term_sort_key: ClassVar[Optional[Callable[["Obo", Term], int]]] = None
|
|
558
554
|
|
|
@@ -583,13 +579,11 @@ class Obo:
|
|
|
583
579
|
|
|
584
580
|
def _get_version(self) -> Optional[str]:
|
|
585
581
|
if self.bioversions_key:
|
|
586
|
-
import bioversions
|
|
587
|
-
|
|
588
582
|
try:
|
|
589
|
-
return
|
|
583
|
+
return get_version(self.bioversions_key)
|
|
590
584
|
except KeyError:
|
|
591
585
|
logger.warning(f"[{self.bioversions_key}] bioversions doesn't list this resource ")
|
|
592
|
-
except
|
|
586
|
+
except OSError:
|
|
593
587
|
logger.warning(f"[{self.bioversions_key}] error while looking up version")
|
|
594
588
|
return None
|
|
595
589
|
|
|
@@ -660,13 +654,14 @@ class Obo:
|
|
|
660
654
|
def iterate_obo_lines(self) -> Iterable[str]:
|
|
661
655
|
"""Iterate over the lines to write in an OBO file."""
|
|
662
656
|
yield f"format-version: {self.format_version}"
|
|
663
|
-
yield f"date: {self.date_formatted}"
|
|
664
657
|
|
|
665
658
|
if self.auto_generated_by is not None:
|
|
666
659
|
yield f"auto-generated-by: {self.auto_generated_by}"
|
|
667
660
|
|
|
668
661
|
if self.data_version is not None:
|
|
669
662
|
yield f"data-version: {self.data_version}"
|
|
663
|
+
else:
|
|
664
|
+
yield f"date: {self.date_formatted}"
|
|
670
665
|
|
|
671
666
|
for prefix, url in sorted((self.idspaces or {}).items()):
|
|
672
667
|
yield f"idspace: {prefix} {url}"
|
|
@@ -715,7 +710,7 @@ class Obo:
|
|
|
715
710
|
@staticmethod
|
|
716
711
|
def _write_lines(it, file: Optional[TextIO]):
|
|
717
712
|
for line in it:
|
|
718
|
-
print(line, file=file)
|
|
713
|
+
print(line, file=file)
|
|
719
714
|
|
|
720
715
|
def write_obonet_gz(self, path: Union[str, Path]) -> None:
|
|
721
716
|
"""Write the OBO to a gzipped dump in Obonet JSON."""
|
|
@@ -894,16 +889,16 @@ class Obo:
|
|
|
894
889
|
self._items = sorted(self.iter_terms(force=self.force), key=key)
|
|
895
890
|
return self._items
|
|
896
891
|
|
|
897
|
-
def __iter__(self) -> Iterator["Term"]:
|
|
892
|
+
def __iter__(self) -> Iterator["Term"]:
|
|
898
893
|
if self.iter_only:
|
|
899
894
|
return iter(self.iter_terms(force=self.force))
|
|
900
895
|
return iter(self._items_accessor)
|
|
901
896
|
|
|
902
|
-
def ancestors(self, identifier: str) ->
|
|
897
|
+
def ancestors(self, identifier: str) -> set[str]:
|
|
903
898
|
"""Return a set of identifiers for parents of the given identifier."""
|
|
904
899
|
return nx.descendants(self.hierarchy, identifier) # note this is backwards
|
|
905
900
|
|
|
906
|
-
def descendants(self, identifier: str) ->
|
|
901
|
+
def descendants(self, identifier: str) -> set[str]:
|
|
907
902
|
"""Return a set of identifiers for the children of the given identifier."""
|
|
908
903
|
return nx.ancestors(self.hierarchy, identifier) # note this is backwards
|
|
909
904
|
|
|
@@ -913,11 +908,12 @@ class Obo:
|
|
|
913
908
|
.. code-block:: python
|
|
914
909
|
|
|
915
910
|
from pyobo import get_obo
|
|
916
|
-
obo = get_obo('go')
|
|
917
911
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
912
|
+
obo = get_obo("go")
|
|
913
|
+
|
|
914
|
+
interleukin_10_complex = "1905571" # interleukin-10 receptor complex
|
|
915
|
+
all_complexes = "0032991"
|
|
916
|
+
assert obo.is_descendant("1905571", "0032991")
|
|
921
917
|
"""
|
|
922
918
|
return ancestor in self.ancestors(descendant)
|
|
923
919
|
|
|
@@ -930,11 +926,12 @@ class Obo:
|
|
|
930
926
|
.. code-block:: python
|
|
931
927
|
|
|
932
928
|
from pyobo import get_obo
|
|
933
|
-
obo = get_obo('go')
|
|
934
929
|
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
930
|
+
obo = get_obo("go")
|
|
931
|
+
|
|
932
|
+
identifier = "1905571" # interleukin-10 receptor complex
|
|
933
|
+
is_complex = "0032991" in nx.descendants(obo.hierarchy, identifier) # should be true
|
|
934
|
+
"""
|
|
938
935
|
if self._hierarchy is None:
|
|
939
936
|
self._hierarchy = nx.DiGraph()
|
|
940
937
|
for term in self._iter_terms(desc=f"[{self.ontology}] getting hierarchy"):
|
|
@@ -1005,10 +1002,10 @@ class Obo:
|
|
|
1005
1002
|
|
|
1006
1003
|
def get_metadata(self) -> Mapping[str, Any]:
|
|
1007
1004
|
"""Get metadata."""
|
|
1008
|
-
return
|
|
1009
|
-
version
|
|
1010
|
-
date
|
|
1011
|
-
|
|
1005
|
+
return {
|
|
1006
|
+
"version": self.data_version,
|
|
1007
|
+
"date": self.date and self.date.isoformat(),
|
|
1008
|
+
}
|
|
1012
1009
|
|
|
1013
1010
|
def iterate_ids(self, *, use_tqdm: bool = False) -> Iterable[str]:
|
|
1014
1011
|
"""Iterate over identifiers."""
|
|
@@ -1016,11 +1013,11 @@ class Obo:
|
|
|
1016
1013
|
if term.prefix == self.ontology:
|
|
1017
1014
|
yield term.identifier
|
|
1018
1015
|
|
|
1019
|
-
def get_ids(self, *, use_tqdm: bool = False) ->
|
|
1016
|
+
def get_ids(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1020
1017
|
"""Get the set of identifiers."""
|
|
1021
1018
|
return set(self.iterate_ids(use_tqdm=use_tqdm))
|
|
1022
1019
|
|
|
1023
|
-
def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1020
|
+
def iterate_id_name(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1024
1021
|
"""Iterate identifier name pairs."""
|
|
1025
1022
|
for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
|
|
1026
1023
|
if term.prefix == self.ontology and term.name:
|
|
@@ -1030,19 +1027,23 @@ class Obo:
|
|
|
1030
1027
|
"""Get a mapping from identifiers to names."""
|
|
1031
1028
|
return dict(self.iterate_id_name(use_tqdm=use_tqdm))
|
|
1032
1029
|
|
|
1033
|
-
def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1030
|
+
def iterate_id_definition(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1034
1031
|
"""Iterate over pairs of terms' identifiers and their respective definitions."""
|
|
1035
1032
|
for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting names"):
|
|
1036
1033
|
if term.identifier and term.definition:
|
|
1037
|
-
yield
|
|
1038
|
-
|
|
1039
|
-
|
|
1034
|
+
yield (
|
|
1035
|
+
term.identifier,
|
|
1036
|
+
term.definition.strip('"')
|
|
1037
|
+
.replace("\n", " ")
|
|
1038
|
+
.replace("\t", " ")
|
|
1039
|
+
.replace(" ", " "),
|
|
1040
|
+
)
|
|
1040
1041
|
|
|
1041
1042
|
def get_id_definition_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, str]:
|
|
1042
1043
|
"""Get a mapping from identifiers to definitions."""
|
|
1043
1044
|
return dict(self.iterate_id_definition(use_tqdm=use_tqdm))
|
|
1044
1045
|
|
|
1045
|
-
def get_obsolete(self, *, use_tqdm: bool = False) ->
|
|
1046
|
+
def get_obsolete(self, *, use_tqdm: bool = False) -> set[str]:
|
|
1046
1047
|
"""Get the set of obsolete identifiers."""
|
|
1047
1048
|
return {
|
|
1048
1049
|
term.identifier
|
|
@@ -1058,7 +1059,7 @@ class Obo:
|
|
|
1058
1059
|
|
|
1059
1060
|
def iterate_id_species(
|
|
1060
1061
|
self, *, prefix: Optional[str] = None, use_tqdm: bool = False
|
|
1061
|
-
) -> Iterable[
|
|
1062
|
+
) -> Iterable[tuple[str, str]]:
|
|
1062
1063
|
"""Iterate over terms' identifiers and respective species (if available)."""
|
|
1063
1064
|
if prefix is None:
|
|
1064
1065
|
prefix = NCBITAXON_PREFIX
|
|
@@ -1085,7 +1086,7 @@ class Obo:
|
|
|
1085
1086
|
]
|
|
1086
1087
|
return pd.DataFrame(rows, columns=["prefix", "identifier", "name"])
|
|
1087
1088
|
|
|
1088
|
-
def iter_typedef_id_name(self) -> Iterable[
|
|
1089
|
+
def iter_typedef_id_name(self) -> Iterable[tuple[str, str]]:
|
|
1089
1090
|
"""Iterate over typedefs' identifiers and their respective names."""
|
|
1090
1091
|
for typedef in self.typedefs or []:
|
|
1091
1092
|
yield typedef.identifier, typedef.name
|
|
@@ -1098,7 +1099,7 @@ class Obo:
|
|
|
1098
1099
|
# PROPS #
|
|
1099
1100
|
#########
|
|
1100
1101
|
|
|
1101
|
-
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1102
|
+
def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, str, str]]:
|
|
1102
1103
|
"""Iterate over tuples of terms, properties, and their values."""
|
|
1103
1104
|
# TODO if property_prefix is set, try removing that as a prefix from all prop strings.
|
|
1104
1105
|
for term in self._iter_terms(
|
|
@@ -1109,10 +1110,10 @@ class Obo:
|
|
|
1109
1110
|
|
|
1110
1111
|
@property
|
|
1111
1112
|
def properties_header(self):
|
|
1112
|
-
"""Property dataframe header."""
|
|
1113
|
+
"""Property dataframe header."""
|
|
1113
1114
|
return [f"{self.ontology}_id", "property", "value"]
|
|
1114
1115
|
|
|
1115
|
-
def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1116
|
+
def iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1116
1117
|
"""Iterate property rows."""
|
|
1117
1118
|
for term, prop, value in self.iterate_properties(use_tqdm=use_tqdm):
|
|
1118
1119
|
yield term.identifier, prop, value
|
|
@@ -1126,7 +1127,7 @@ class Obo:
|
|
|
1126
1127
|
|
|
1127
1128
|
def iterate_filtered_properties(
|
|
1128
1129
|
self, prop: str, *, use_tqdm: bool = False
|
|
1129
|
-
) -> Iterable[
|
|
1130
|
+
) -> Iterable[tuple[Term, str]]:
|
|
1130
1131
|
"""Iterate over tuples of terms and the values for the given property."""
|
|
1131
1132
|
for term in self._iter_terms(use_tqdm=use_tqdm):
|
|
1132
1133
|
for _prop, value in term.iterate_properties():
|
|
@@ -1154,7 +1155,7 @@ class Obo:
|
|
|
1154
1155
|
|
|
1155
1156
|
def get_filtered_properties_multimapping(
|
|
1156
1157
|
self, prop: str, *, use_tqdm: bool = False
|
|
1157
|
-
) -> Mapping[str,
|
|
1158
|
+
) -> Mapping[str, list[str]]:
|
|
1158
1159
|
"""Get a mapping from a term's identifier to the property values."""
|
|
1159
1160
|
return multidict(
|
|
1160
1161
|
(term.identifier, value)
|
|
@@ -1167,7 +1168,7 @@ class Obo:
|
|
|
1167
1168
|
|
|
1168
1169
|
def iterate_relations(
|
|
1169
1170
|
self, *, use_tqdm: bool = False
|
|
1170
|
-
) -> Iterable[
|
|
1171
|
+
) -> Iterable[tuple[Term, TypeDef, Reference]]:
|
|
1171
1172
|
"""Iterate over tuples of terms, relations, and their targets."""
|
|
1172
1173
|
for term in self._iter_terms(
|
|
1173
1174
|
use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting relations"
|
|
@@ -1184,17 +1185,23 @@ class Obo:
|
|
|
1184
1185
|
|
|
1185
1186
|
def iter_relation_rows(
|
|
1186
1187
|
self, use_tqdm: bool = False
|
|
1187
|
-
) -> Iterable[
|
|
1188
|
+
) -> Iterable[tuple[str, str, str, str, str]]:
|
|
1188
1189
|
"""Iterate the relations' rows."""
|
|
1189
1190
|
for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
|
|
1190
|
-
yield
|
|
1191
|
+
yield (
|
|
1192
|
+
term.identifier,
|
|
1193
|
+
typedef.prefix,
|
|
1194
|
+
typedef.identifier,
|
|
1195
|
+
reference.prefix,
|
|
1196
|
+
reference.identifier,
|
|
1197
|
+
)
|
|
1191
1198
|
|
|
1192
1199
|
def iterate_filtered_relations(
|
|
1193
1200
|
self,
|
|
1194
1201
|
relation: RelationHint,
|
|
1195
1202
|
*,
|
|
1196
1203
|
use_tqdm: bool = False,
|
|
1197
|
-
) -> Iterable[
|
|
1204
|
+
) -> Iterable[tuple[Term, Reference]]:
|
|
1198
1205
|
"""Iterate over tuples of terms and ther targets for the given relation."""
|
|
1199
1206
|
_target_prefix, _target_identifier = get_reference_tuple(relation)
|
|
1200
1207
|
for term, typedef, reference in self.iterate_relations(use_tqdm=use_tqdm):
|
|
@@ -1203,7 +1210,7 @@ class Obo:
|
|
|
1203
1210
|
|
|
1204
1211
|
@property
|
|
1205
1212
|
def relations_header(self) -> Sequence[str]:
|
|
1206
|
-
"""Header for the relations dataframe."""
|
|
1213
|
+
"""Header for the relations dataframe."""
|
|
1207
1214
|
return [f"{self.ontology}_id", RELATION_PREFIX, RELATION_ID, TARGET_PREFIX, TARGET_ID]
|
|
1208
1215
|
|
|
1209
1216
|
def get_relations_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
@@ -1234,7 +1241,7 @@ class Obo:
|
|
|
1234
1241
|
target_prefix: str,
|
|
1235
1242
|
*,
|
|
1236
1243
|
use_tqdm: bool = False,
|
|
1237
|
-
) -> Iterable[
|
|
1244
|
+
) -> Iterable[tuple[Term, Reference]]:
|
|
1238
1245
|
"""Iterate over relationships between one identifier and another."""
|
|
1239
1246
|
for term, reference in self.iterate_filtered_relations(
|
|
1240
1247
|
relation=relation, use_tqdm=use_tqdm
|
|
@@ -1257,9 +1264,9 @@ class Obo:
|
|
|
1257
1264
|
|
|
1258
1265
|
>>> from pyobo.sources.hgnc import get_obo
|
|
1259
1266
|
>>> obo = get_obo()
|
|
1260
|
-
>>> human_mapt_hgnc_id =
|
|
1261
|
-
>>> mouse_mapt_mgi_id =
|
|
1262
|
-
>>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping(
|
|
1267
|
+
>>> human_mapt_hgnc_id = "6893"
|
|
1268
|
+
>>> mouse_mapt_mgi_id = "97180"
|
|
1269
|
+
>>> hgnc_mgi_orthology_mapping = obo.get_relation_mapping("ro:HOM0000017", "mgi")
|
|
1263
1270
|
>>> assert mouse_mapt_mgi_id == hgnc_mgi_orthology_mapping[human_mapt_hgnc_id]
|
|
1264
1271
|
"""
|
|
1265
1272
|
return {
|
|
@@ -1283,9 +1290,9 @@ class Obo:
|
|
|
1283
1290
|
|
|
1284
1291
|
>>> from pyobo.sources.hgnc import get_obo
|
|
1285
1292
|
>>> obo = get_obo()
|
|
1286
|
-
>>> human_mapt_hgnc_id =
|
|
1287
|
-
>>> mouse_mapt_mgi_id =
|
|
1288
|
-
>>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id,
|
|
1293
|
+
>>> human_mapt_hgnc_id = "6893"
|
|
1294
|
+
>>> mouse_mapt_mgi_id = "97180"
|
|
1295
|
+
>>> assert mouse_mapt_mgi_id == obo.get_relation(human_mapt_hgnc_id, "ro:HOM0000017", "mgi")
|
|
1289
1296
|
"""
|
|
1290
1297
|
relation_mapping = self.get_relation_mapping(
|
|
1291
1298
|
relation=relation, target_prefix=target_prefix, use_tqdm=use_tqdm
|
|
@@ -1298,7 +1305,7 @@ class Obo:
|
|
|
1298
1305
|
target_prefix: str,
|
|
1299
1306
|
*,
|
|
1300
1307
|
use_tqdm: bool = False,
|
|
1301
|
-
) -> Mapping[str,
|
|
1308
|
+
) -> Mapping[str, list[str]]:
|
|
1302
1309
|
"""Get a mapping from the term's identifier to the target's identifiers."""
|
|
1303
1310
|
return multidict(
|
|
1304
1311
|
(term.identifier, reference.identifier)
|
|
@@ -1314,7 +1321,7 @@ class Obo:
|
|
|
1314
1321
|
typedef: TypeDef,
|
|
1315
1322
|
*,
|
|
1316
1323
|
use_tqdm: bool = False,
|
|
1317
|
-
) -> Mapping[str,
|
|
1324
|
+
) -> Mapping[str, list[Reference]]:
|
|
1318
1325
|
"""Get a mapping from identifiers to a list of all references for the given relation."""
|
|
1319
1326
|
return multidict(
|
|
1320
1327
|
(term.identifier, reference)
|
|
@@ -1328,18 +1335,18 @@ class Obo:
|
|
|
1328
1335
|
# SYNONYMS #
|
|
1329
1336
|
############
|
|
1330
1337
|
|
|
1331
|
-
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1338
|
+
def iterate_synonyms(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Synonym]]:
|
|
1332
1339
|
"""Iterate over pairs of term and synonym object."""
|
|
1333
1340
|
for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting synonyms"):
|
|
1334
1341
|
for synonym in sorted(term.synonyms, key=attrgetter("name")):
|
|
1335
1342
|
yield term, synonym
|
|
1336
1343
|
|
|
1337
|
-
def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1344
|
+
def iterate_synonym_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str]]:
|
|
1338
1345
|
"""Iterate over pairs of identifier and synonym text."""
|
|
1339
1346
|
for term, synonym in self.iterate_synonyms(use_tqdm=use_tqdm):
|
|
1340
1347
|
yield term.identifier, synonym.name
|
|
1341
1348
|
|
|
1342
|
-
def get_id_synonyms_mapping(self, *, use_tqdm: bool = False) -> Mapping[str,
|
|
1349
|
+
def get_id_synonyms_mapping(self, *, use_tqdm: bool = False) -> Mapping[str, list[str]]:
|
|
1343
1350
|
"""Get a mapping from identifiers to a list of sorted synonym strings."""
|
|
1344
1351
|
return multidict(self.iterate_synonym_rows(use_tqdm=use_tqdm))
|
|
1345
1352
|
|
|
@@ -1347,7 +1354,7 @@ class Obo:
|
|
|
1347
1354
|
# XREFS #
|
|
1348
1355
|
#########
|
|
1349
1356
|
|
|
1350
|
-
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1357
|
+
def iterate_xrefs(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term, Reference]]:
|
|
1351
1358
|
"""Iterate over xrefs."""
|
|
1352
1359
|
for term in self._iter_terms(use_tqdm=use_tqdm, desc=f"[{self.ontology}] getting xrefs"):
|
|
1353
1360
|
for xref in term.xrefs:
|
|
@@ -1355,20 +1362,20 @@ class Obo:
|
|
|
1355
1362
|
|
|
1356
1363
|
def iterate_filtered_xrefs(
|
|
1357
1364
|
self, prefix: str, *, use_tqdm: bool = False
|
|
1358
|
-
) -> Iterable[
|
|
1365
|
+
) -> Iterable[tuple[Term, Reference]]:
|
|
1359
1366
|
"""Iterate over xrefs to a given prefix."""
|
|
1360
1367
|
for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
|
|
1361
1368
|
if xref.prefix == prefix:
|
|
1362
1369
|
yield term, xref
|
|
1363
1370
|
|
|
1364
|
-
def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[
|
|
1371
|
+
def iterate_xref_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str]]:
|
|
1365
1372
|
"""Iterate over terms' identifiers, xref prefixes, and xref identifiers."""
|
|
1366
1373
|
for term, xref in self.iterate_xrefs(use_tqdm=use_tqdm):
|
|
1367
1374
|
yield term.identifier, xref.prefix, xref.identifier
|
|
1368
1375
|
|
|
1369
1376
|
@property
|
|
1370
1377
|
def xrefs_header(self):
|
|
1371
|
-
"""The header for the xref dataframe."""
|
|
1378
|
+
"""The header for the xref dataframe."""
|
|
1372
1379
|
return [f"{self.ontology}_id", TARGET_PREFIX, TARGET_ID]
|
|
1373
1380
|
|
|
1374
1381
|
def get_xrefs_df(self, *, use_tqdm: bool = False) -> pd.DataFrame:
|
|
@@ -1389,7 +1396,7 @@ class Obo:
|
|
|
1389
1396
|
|
|
1390
1397
|
def get_filtered_multixrefs_mapping(
|
|
1391
1398
|
self, prefix: str, *, use_tqdm: bool = False
|
|
1392
|
-
) -> Mapping[str,
|
|
1399
|
+
) -> Mapping[str, list[str]]:
|
|
1393
1400
|
"""Get filtered xrefs as a dictionary."""
|
|
1394
1401
|
return multidict(
|
|
1395
1402
|
(term.identifier, xref.identifier)
|
|
@@ -1400,18 +1407,18 @@ class Obo:
|
|
|
1400
1407
|
# ALTS #
|
|
1401
1408
|
########
|
|
1402
1409
|
|
|
1403
|
-
def iterate_alts(self) -> Iterable[
|
|
1410
|
+
def iterate_alts(self) -> Iterable[tuple[Term, Reference]]:
|
|
1404
1411
|
"""Iterate over alternative identifiers."""
|
|
1405
1412
|
for term in self:
|
|
1406
1413
|
for alt in term.alt_ids:
|
|
1407
1414
|
yield term, alt
|
|
1408
1415
|
|
|
1409
|
-
def iterate_alt_rows(self) -> Iterable[
|
|
1416
|
+
def iterate_alt_rows(self) -> Iterable[tuple[str, str]]:
|
|
1410
1417
|
"""Iterate over pairs of terms' primary identifiers and alternate identifiers."""
|
|
1411
1418
|
for term, alt in self.iterate_alts():
|
|
1412
1419
|
yield term.identifier, alt.identifier
|
|
1413
1420
|
|
|
1414
|
-
def get_id_alts_mapping(self) -> Mapping[str,
|
|
1421
|
+
def get_id_alts_mapping(self) -> Mapping[str, list[str]]:
|
|
1415
1422
|
"""Get a mapping from identifiers to a list of alternative identifiers."""
|
|
1416
1423
|
return multidict((term.identifier, alt.identifier) for term, alt in self.iterate_alts())
|
|
1417
1424
|
|
|
@@ -1421,14 +1428,14 @@ def make_ad_hoc_ontology(
|
|
|
1421
1428
|
_name: str,
|
|
1422
1429
|
_auto_generated_by: Optional[str] = None,
|
|
1423
1430
|
_format_version: str = "1.2",
|
|
1424
|
-
_typedefs: Optional[
|
|
1425
|
-
_synonym_typedefs: Optional[
|
|
1431
|
+
_typedefs: Optional[list[TypeDef]] = None,
|
|
1432
|
+
_synonym_typedefs: Optional[list[SynonymTypeDef]] = None,
|
|
1426
1433
|
_date: Optional[datetime] = None,
|
|
1427
1434
|
_data_version: Optional[str] = None,
|
|
1428
1435
|
_idspaces: Optional[Mapping[str, str]] = None,
|
|
1429
|
-
_root_terms: Optional[
|
|
1436
|
+
_root_terms: Optional[list[Reference]] = None,
|
|
1430
1437
|
*,
|
|
1431
|
-
terms:
|
|
1438
|
+
terms: list[Term],
|
|
1432
1439
|
) -> "Obo":
|
|
1433
1440
|
"""Make an ad-hoc ontology."""
|
|
1434
1441
|
|
|
@@ -1455,7 +1462,7 @@ def make_ad_hoc_ontology(
|
|
|
1455
1462
|
return AdHocOntology()
|
|
1456
1463
|
|
|
1457
1464
|
|
|
1458
|
-
def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) ->
|
|
1465
|
+
def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> list[Mapping[str, Any]]:
|
|
1459
1466
|
"""Convert the type defs."""
|
|
1460
1467
|
if not typedefs:
|
|
1461
1468
|
return []
|
|
@@ -1465,10 +1472,10 @@ def _convert_typedefs(typedefs: Optional[Iterable[TypeDef]]) -> List[Mapping[str
|
|
|
1465
1472
|
def _convert_typedef(typedef: TypeDef) -> Mapping[str, Any]:
|
|
1466
1473
|
"""Convert a type def."""
|
|
1467
1474
|
# TODO add more later
|
|
1468
|
-
return typedef.reference.
|
|
1475
|
+
return typedef.reference.model_dump()
|
|
1469
1476
|
|
|
1470
1477
|
|
|
1471
|
-
def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) ->
|
|
1478
|
+
def _convert_synonym_typedefs(synonym_typedefs: Optional[Iterable[SynonymTypeDef]]) -> list[str]:
|
|
1472
1479
|
"""Convert the synonym type defs."""
|
|
1473
1480
|
if not synonym_typedefs:
|
|
1474
1481
|
return []
|